In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import LabelEncoder
import pickle

# Load the dataset
data = pd.read_csv(r'C:\Users\user\Downloads\Traffic (1).csv')

# Data Preprocessing
# Convert 'Time' column to hour of the day
data['Hour'] = pd.to_datetime(data['Time'], format='%I:%M:%S %p').dt.hour

# Convert 'Date' column to day of the year
data['Date'] = pd.to_datetime(data['Date'], errors='coerce').dt.dayofyear

# Label encode 'Day of the week' column
data['Day of the week'] = LabelEncoder().fit_transform(data['Day of the week'])

# Print unique values before and after encoding to ensure it's correct
print("Before encoding Traffic Situation:", data['Traffic Situation'].unique())
data['Traffic Situation'] = LabelEncoder().fit_transform(data['Traffic Situation'])
print("After encoding Traffic Situation:", data['Traffic Situation'].unique())

# Ensure all entries are strings before encoding (in case of mixed types)
data['Traffic Situation'] = data['Traffic Situation'].astype(str)
data['Traffic Situation'] = LabelEncoder().fit_transform(data['Traffic Situation'])

# Check for missing values and fill them
data = data.fillna(0)

# Verify the data types
print(data.dtypes)

# Features and target
X = data[['Hour', 'Date', 'Day of the week', 'CarCount', 'BikeCount', 'BusCount', 'TruckCount']]
y = data['Traffic Situation'].astype(float)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Supervised learning model 1: Linear Regression
lr = LinearRegression()
lr.fit(X_train, y_train)

# Supervised learning model 2: Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Unsupervised learning model 1: KMeans
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(X)

# Unsupervised learning model 2: DBSCAN
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan.fit(X)

# Save models with pickle
with open('lr_model.pkl', 'wb') as f:
    pickle.dump(lr, f)

with open('rf_model.pkl', 'wb') as f:
    pickle.dump(rf, f)

with open('kmeans_model.pkl', 'wb') as f:
    pickle.dump(kmeans, f)

with open('dbscan_model.pkl', 'wb') as f:
    pickle.dump(dbscan, f)

print("Models have been trained and saved successfully.")




Before encoding Traffic Situation: ['low' 'normal' 'heavy' 'high']
After encoding Traffic Situation: [2 3 0 1]
Time                 object
Date                  int64
Day of the week       int32
CarCount              int64
BikeCount             int64
BusCount              int64
TruckCount            int64
Total                 int64
Traffic Situation     int32
Hour                  int64
dtype: object




Models have been trained and saved successfully.
