In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load Dataset
file_path = "Dataset/Cleaned_bangalore_parking.csv"
df = pd.read_csv(file_path)

# Display first few rows to understand the structure
print(df.head())
print(df.info())  # Check for missing values


                      Name   Latitude  Longitude timestamp
0  Whitefield TTMC Parking  12.976810  77.726878  11:51:00
1                  Unnamed  12.914445  77.637000  17:57:00
2                  Unnamed  12.884377  77.726552  09:17:00
3                  Unnamed  12.893634  77.600952  21:08:00
4                  Unnamed  12.893036  77.601510  08:33:16
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 761 entries, 0 to 760
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Name       761 non-null    object 
 1   Latitude   761 non-null    float64
 2   Longitude  761 non-null    float64
 3   timestamp  761 non-null    object 
dtypes: float64(2), object(2)
memory usage: 23.9+ KB
None


In [4]:
import pandas as pd

# Function to convert timestamp to minutes
def time_to_minutes(time_str):
    try:
        time_obj = pd.to_datetime(time_str, errors="coerce")  # Convert to datetime
        if pd.isnull(time_obj):  # Handle invalid values
            return None
        return time_obj.hour * 60 + time_obj.minute  # Convert to total minutes
    except Exception as e:
        print(f"Error processing time: {time_str} -> {e}")
        return None

# Load CSV
df = pd.read_csv("Dataset/Cleaned_bangalore_parking.csv")

# Apply the function to convert timestamp
df["Time_Minutes"] = df["timestamp"].apply(time_to_minutes)

# Drop original timestamp column (optional)
df.drop(columns=["timestamp"], inplace=True)

# Show sample data
print(df.head())


                      Name   Latitude  Longitude  Time_Minutes
0  Whitefield TTMC Parking  12.976810  77.726878           711
1                  Unnamed  12.914445  77.637000          1077
2                  Unnamed  12.884377  77.726552           557
3                  Unnamed  12.893634  77.600952          1268
4                  Unnamed  12.893036  77.601510           513


In [9]:
import pandas as pd
import numpy as np

# Load dataset (all rows are demand zones)
df = pd.read_csv("Dataset/Cleaned_bangalore_parking.csv")

# Convert timestamp to minutes
def time_to_minutes(time_str):
    time_obj = pd.to_datetime(time_str, errors="coerce")
    return None if pd.isnull(time_obj) else time_obj.hour * 60 + time_obj.minute

df["Time_Minutes"] = df["timestamp"].apply(time_to_minutes)
df.drop(columns=["timestamp"], inplace=True)

# Mark demand presence as 1 (since all rows are demand zones)
df["demand_presence"] = 1

# Generate non-demand samples by shifting time to non-peak hours
num_fake_samples = len(df)  # Same number of non-demand samples

# Define peak hours (modify based on data insights)
peak_start = 8 * 60  # 8:00 AM in minutes
peak_end = 11 * 60  # 11:00 AM in minutes

fake_times = np.random.choice(
    list(range(0, peak_start)) + list(range(peak_end, 1440)),  # Select only non-peak hours
    num_fake_samples
)

df_fake = df.copy()
df_fake["Time_Minutes"] = fake_times  # Assign non-peak times
df_fake["demand_presence"] = 0  # Mark as non-demand

# Combine real and fake samples
df_combined = pd.concat([df, df_fake], ignore_index=True)

# Shuffle dataset
df_combined = df_combined.sample(frac=1, random_state=42).reset_index(drop=True)

# Save new dataset
df_combined.to_csv("Dataset/Demand_Training_Data.csv", index=False)

print("Balanced dataset created with demand and non-demand zones!")


Balanced dataset created with demand and non-demand zones!


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load balanced dataset
df = pd.read_csv("Dataset/Demand_Training_Data.csv")

# Define features and target
X = df[["Latitude", "Longitude", "Time_Minutes"]]
y = df["demand_presence"]

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Test the model
y_pred = model.predict(X_test)

# Evaluate performance
print("Model Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Model Accuracy: 0.4491803278688525
              precision    recall  f1-score   support

           0       0.45      0.44      0.44       154
           1       0.45      0.46      0.45       151

    accuracy                           0.45       305
   macro avg       0.45      0.45      0.45       305
weighted avg       0.45      0.45      0.45       305



In [11]:
import pandas as pd
import numpy as np

# Function to convert time (HH:MM) to minutes
def time_to_minutes(time_str):
    h, m = map(int, time_str.split(":"))
    return h * 60 + m

# Function to predict demand for a given location & time
def predict_demand(latitude, longitude, time_str, model):
    # Convert time to minutes
    time_minutes = time_to_minutes(time_str)

    # Create input data for prediction
    input_data = np.array([[latitude, longitude, time_minutes]])

    # Predict using trained model
    prediction = model.predict(input_data)

    # Return result
    return "Demand Zone" if prediction[0] == 1 else "Non-Demand Zone"


In [12]:
# Example test locations & times
test_locations = [
    {"latitude": 12.9716, "longitude": 77.5946, "time": "09:30"},  # Peak time
    {"latitude": 12.9611, "longitude": 77.6387, "time": "14:00"},  # Off-peak
    {"latitude": 13.0358, "longitude": 77.5970, "time": "18:15"},  # Evening rush
]

# Check demand prediction for each test case
for loc in test_locations:
    result = predict_demand(loc["latitude"], loc["longitude"], loc["time"], model)
    print(f"Location ({loc['latitude']}, {loc['longitude']}) at {loc['time']} → {result}")


Location (12.9716, 77.5946) at 09:30 → Demand Zone
Location (12.9611, 77.6387) at 14:00 → Non-Demand Zone
Location (13.0358, 77.597) at 18:15 → Demand Zone




In [13]:
import joblib

# Save the trained model
joblib.dump(model, "demand_prediction_model2.pkl")
print("Model saved successfully as demand_prediction_model.pkl")


Model saved successfully as demand_prediction_model.pkl


In [None]:
import joblib

# Load the trained model
model = joblib.load("demand_prediction_model2.pkl")
print("Model loaded successfully!")

import pandas as pd

# Example test data (latitude, longitude, and time in minutes)
test_data = pd.DataFrame({
    "Latitude": [12.9716],  # Example location in Bangalore
    "Longitude": [77.5946],
    "Time_Minutes": [9 * 60 + 30]  # 9:30 AM converted to minutes
})

# Make predictions
prediction = model.predict(test_data)

# Show result
print("Demand", "High" if prediction[0] == 1 else "Low")



Model loaded successfully!
Prediction: Demand Zone


In [15]:

import joblib

# Load the trained model
model = joblib.load("demand_prediction_model2.pkl")
print("Model loaded successfully!")

import pandas as pd

# Example test data (latitude, longitude, and time in minutes)
test_data = pd.DataFrame({
    "Latitude": [12.9611],  # Example location in Bangalore
    "Longitude": [77.6387],
    "Time_Minutes": [14 * 60 + 30]  # 9:30 AM converted to minutes
})

# Make predictions
prediction = model.predict(test_data)

# Show result
print("Prediction:", "Demand Zone" if prediction[0] == 1 else "No Demand Zone")



Model loaded successfully!
Prediction: No Demand Zone
