In [None]:
import pandas as pd
import numpy as np

# set  random seeds for producibility
np.random.seed(42)


In [None]:
num_samples = 100

In [None]:
carbon_emissions = np.random.uniform(50, 400, num_samples)
energy_output = np.random.uniform(100, 1000, num_samples)
renewability_index = np.random.uniform(0, 1, num_samples)
cost_efficiency = np.random.uniform(1, 10, num_samples)


In [None]:
sustainability = [
    1 if (emission < 200 and renewability > 0.5 and cost < 5) else 0
           for emission, renewability, cost in zip(carbon_emissions, renewability_index, cost_efficiency)
]

In [None]:
# Create DataFrame
data = {
    'carbon_emissions': carbon_emissions,
    'energy_output': energy_output,
    'renewability_index': renewability_index,
    'cost_efficiency': cost_efficiency,
    'sustainability': sustainability
}

df = pd.DataFrame(data)

print(df.head())



   carbon_emissions  energy_output  renewability_index  cost_efficiency  \
0        181.089042     128.286267            0.642032         1.465135   
1        382.750007     672.769370            0.084140         5.782192   
2        306.197880     382.920383            0.161629         5.865716   
3        259.530469     557.713622            0.898554         6.736869   
4        104.606524     916.809827            0.606429         7.534822   

   sustainability  
0               1  
1               0  
2               0  
3               0  
4               0  


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = df
print(data.head())
print(data.isnull().sum())
data.fillna(data.mean(), inplace=True)

   carbon_emissions  energy_output  renewability_index  cost_efficiency  \
0        181.089042     128.286267            0.642032         1.465135   
1        382.750007     672.769370            0.084140         5.782192   
2        306.197880     382.920383            0.161629         5.865716   
3        259.530469     557.713622            0.898554         6.736869   
4        104.606524     916.809827            0.606429         7.534822   

   sustainability  
0               1  
1               0  
2               0  
3               0  
4               0  
carbon_emissions      0
energy_output         0
renewability_index    0
cost_efficiency       0
sustainability        0
dtype: int64


In [None]:
x = data[['carbon_emissions', 'energy_output', 'renewability_index', 'cost_efficiency']]
y = data['sustainability']

In [None]:
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Check split sizes
print("Training data shape:", X_train.shape)
print("Testing data shape:", X_test.shape)


Training data shape: (80, 4)
Testing data shape: (20, 4)


In [None]:
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

Accuracy: 0.95
