In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Load the dataset
file_path = "/content/synthetic_ev_fleet_extended_dataset.csv"
df = pd.read_csv(file_path)

# Choose the target variable
target = 'Vehicle Status'
features = [col for col in df.columns if col != target]

# Encode the target variable
label_encoder = LabelEncoder()
df[target] = label_encoder.fit_transform(df[target])

# Encode categorical features
categorical_features = df.select_dtypes(include=['object']).columns
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)

# Split data into features (X) and target (y)
X = df.drop(columns=[target])
y = df[target]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features (optional for Logistic Regression but improves performance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train a Logistic Regression model
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

# Classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load the dataset
file_path = "/content/synthetic_ev_fleet_extended_dataset.csv"
data = pd.read_csv(file_path)

# Step 1: Feature Engineering
# Compute "Cost per Meter"
data['Cost per Meter'] = data['Maintenance Cost ($)'] / data['Electric Range (km)']

# Select relevant features
features = [
    'Battery Capacity (kWh)', 'Top Speed (km/h)', 'Electric Range (km)',
    'Fast Charge Speed (kW)', 'Amount for Charging ($)'
]
target = 'Cost per Meter'

# Drop rows with missing values
data = data.dropna(subset=features + [target])

# Step 2: Prepare data for training
X = data[features]
y = data[target]

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 3: Build the Neural Network
model = Sequential([
    Dense(64, activation='relu', input_dim=X_train_scaled.shape[1]),
    Dense(32, activation='relu'),
    Dense(1)  # Output layer for regression
])

model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Step 4: Train the Model
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

# Step 5: Evaluate the Model
loss, mae = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Mean Absolute Error on Test Data: {mae}")

# Step 6: Make Predictions
predictions = model.predict(X_test_scaled)

# Save predictions to a CSV file
results = X_test.copy()
results['Actual Cost per Meter'] = y_test
results['Predicted Cost per Meter'] = predictions
results.to_csv('/mnt/data/cost_per_meter_predictions.csv', index=False)

print("Predictions saved to '/mnt/data/cost_per_meter_predictions.csv'")


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2ms/step - loss: 5.3280 - mae: 1.8092 - val_loss: 4.0217 - val_mae: 1.6361
Epoch 2/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4.0711 - mae: 1.6418 - val_loss: 4.0188 - val_mae: 1.6336
Epoch 3/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.0738 - mae: 1.6430 - val_loss: 4.0276 - val_mae: 1.6359
Epoch 4/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - loss: 4.0348 - mae: 1.6342 - val_loss: 4.0078 - val_mae: 1.6310
Epoch 5/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 4.0579 - mae: 1.6411 - val_loss: 4.0022 - val_mae: 1.6297
Epoch 6/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 4.0272 - mae: 1.6335 - val_loss: 3.9940 - val_mae: 1.6295
Epoch 7/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 2m

OSError: Cannot save file into a non-existent directory: '/mnt/data'

In [5]:
# Save predictions to a CSV file in the Colab environment
results.to_csv('/content/cost_per_meter_predictions.csv', index=False)
print("Predictions saved to '/content/cost_per_meter_predictions.csv'")


Predictions saved to '/content/cost_per_meter_predictions.csv'


In [6]:
# Display the first few rows of the results DataFrame
results.head()



Unnamed: 0,Battery Capacity (kWh),Top Speed (km/h),Electric Range (km),Fast Charge Speed (kW),Amount for Charging ($),Actual Cost per Meter,Predicted Cost per Meter
75721,75,115,420,107,31.31,0.322833,2.571744
80184,70,163,414,122,21.56,1.327657,2.646835
19864,94,179,297,132,10.09,1.239865,3.413599
76699,109,127,211,55,32.26,4.035308,5.436848
92991,118,129,218,55,22.85,1.894908,5.332797
