In [1]:
pip install numpy pandas tensorflow scikit-learn



In [13]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
from sklearn.preprocessing import StandardScaler, LabelEncoder
from collections import deque


file_path = "/content/Updated_Prices_Dataset__Rounded_.csv"
df = pd.read_csv(file_path)

feature_columns = ["location", "start_hour", "end_hour", "charging_duration", "day_of_week"]
target_column = "price"


max_price = df["price"].max()
df["price"] = df["price"] / max_price


label_encoders = {}
for col in ["location", "day_of_week"]:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le


scaler = StandardScaler()
df[["start_hour", "end_hour", "charging_duration"]] = scaler.fit_transform(df[["start_hour", "end_hour", "charging_duration"]])


X = df[feature_columns].values
y = df[target_column].values


class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Hyperparameters
input_dim = X.shape[1]  # Number of features
output_dim = 1  # Predicting price (single output)
learning_rate = 0.001
gamma = 0.99  # Discount factor for rewards
epsilon = 1.0  # Initial exploration rate
epsilon_decay = 0.995
epsilon_min = 0.01
batch_size = 32
memory_size = 10000
num_episodes = 500

dqn = DQN(input_dim, output_dim)
optimizer = optim.Adam(dqn.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()
memory = deque(maxlen=memory_size)


for episode in range(num_episodes):
    idx = random.randint(0, len(X) - 1)
    state = torch.tensor(X[idx], dtype=torch.float32)
    actual_price = y[idx]


    if random.random() < epsilon:
        action = random.uniform(0, 1)
    else:
        with torch.no_grad():
            action = dqn(state).item()


    reward = -abs(actual_price - action)


    memory.append((state, action, reward))
    loss=0
    # Sample batch and train
    if len(memory) > batch_size:
        batch = random.sample(memory, batch_size)
        states, actions, rewards = zip(*batch)

        states = torch.stack(states)
        actions = torch.tensor(actions, dtype=torch.float32).unsqueeze(1)
        rewards = torch.tensor(rewards, dtype=torch.float32).unsqueeze(1)

        predictions = dqn(states)
        loss = loss_fn(predictions, actions)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Decay epsilon
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    if isinstance(loss, torch.Tensor):
            print(f"Episode [{episode}/{num_episodes}], Loss: {loss.item():.4f}")
    else:
            print(f"Episode [{episode}/{num_episodes}], Loss: {loss:.4f}")  # Print loss as a float if it's not a tensor

sample_input = np.array([[3, 14, 16, 1.5, 4]])  # Example: location 3, 2 PM-4 PM, 1.5 hr, Thursday
sample_df = pd.DataFrame(sample_input, columns=feature_columns)
sample_df[["start_hour", "end_hour", "charging_duration"]] = scaler.transform(sample_df[["start_hour", "end_hour", "charging_duration"]])

for col in ["location", "day_of_week"]:
    sample_df[col] = sample_df[col].apply(lambda x: label_encoders[col].classes_[0] if x not in label_encoders[col].classes_ else x)
    sample_df[col] = label_encoders[col].transform(sample_df[col])

sample_tensor = torch.tensor(sample_df.values, dtype=torch.float32)
predicted_price = dqn(sample_tensor).item() * max_price  # Convert back

print(f"\nPredicted Price: ₹{predicted_price:.2f}")


Episode [0/500], Loss: 0.0000
Episode [1/500], Loss: 0.0000
Episode [2/500], Loss: 0.0000
Episode [3/500], Loss: 0.0000
Episode [4/500], Loss: 0.0000
Episode [5/500], Loss: 0.0000
Episode [6/500], Loss: 0.0000
Episode [7/500], Loss: 0.0000
Episode [8/500], Loss: 0.0000
Episode [9/500], Loss: 0.0000
Episode [10/500], Loss: 0.0000
Episode [11/500], Loss: 0.0000
Episode [12/500], Loss: 0.0000
Episode [13/500], Loss: 0.0000
Episode [14/500], Loss: 0.0000
Episode [15/500], Loss: 0.0000
Episode [16/500], Loss: 0.0000
Episode [17/500], Loss: 0.0000
Episode [18/500], Loss: 0.0000
Episode [19/500], Loss: 0.0000
Episode [20/500], Loss: 0.0000
Episode [21/500], Loss: 0.0000
Episode [22/500], Loss: 0.0000
Episode [23/500], Loss: 0.0000
Episode [24/500], Loss: 0.0000
Episode [25/500], Loss: 0.0000
Episode [26/500], Loss: 0.0000
Episode [27/500], Loss: 0.0000
Episode [28/500], Loss: 0.0000
Episode [29/500], Loss: 0.0000
Episode [30/500], Loss: 0.0000
Episode [31/500], Loss: 0.0000
Episode [32/500], 

In [9]:
sample_input = np.array([[3, 15, 16, 2, 4]])  # Example: location 3, 2 PM-4 PM, 1.5 hr, Thursday
sample_df = pd.DataFrame(sample_input, columns=feature_columns)
sample_df[["start_hour", "end_hour", "charging_duration"]] = scaler.transform(sample_df[["start_hour", "end_hour", "charging_duration"]])

# ✅ Handle unseen labels safely
for col in ["location", "day_of_week"]:
    sample_df[col] = sample_df[col].apply(lambda x: label_encoders[col].classes_[0] if x not in label_encoders[col].classes_ else x)
    sample_df[col] = label_encoders[col].transform(sample_df[col])

sample_tensor = torch.tensor(sample_df.values, dtype=torch.float32)
predicted_price = dqn(sample_tensor).item() * max_price  # Convert back

print(f"\nPredicted Price: ₹{predicted_price:.2f}")



Predicted Price: ₹11.62


In [5]:
# prompt: save model

import numpy as np
# Save the trained model
torch.save(dqn.state_dict(), 'dqn_model.pth')

# Save other necessary components for inference
import joblib
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(label_encoders, 'label_encoders.pkl')
np.save('max_price.npy', max_price)
