In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline

In [4]:
# Data Loading and Preprocessing
file_path = 'CarSharing_Preprocessed.csv' 
df = pd.read_csv(file_path)
df['timestamp'] = pd.to_datetime(df['timestamp'])
df.set_index('timestamp', inplace=True)

In [5]:
# Feature Engineering
label_encoder = LabelEncoder()
df['season'] = label_encoder.fit_transform(df['season'])
df['holiday'] = label_encoder.fit_transform(df['holiday'])
df['workingday'] = label_encoder.fit_transform(df['workingday'])
df['weather'] = label_encoder.fit_transform(df['weather'])
df['hour'] = df.index.hour
df['day_of_week'] = df.index.dayofweek
df['month'] = df.index.month

# Target Variable
average_demand = df['demand'].mean()
df['demand_category'] = df['demand'].apply(lambda x: 1 if x > average_demand else 2)


# Data Preparation for Model Training
X = df.drop(['id', 'demand', 'demand_category'], axis=1) 
y = df['demand_category']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Model Initialisation and Pipeline Creation
knn = make_pipeline(StandardScaler(), KNeighborsClassifier(n_neighbors=5))
dt = DecisionTreeClassifier()
rf = RandomForestClassifier(n_estimators=100)
print("\n")


# Model Training and Evaluation
classifiers = {
    'K-Nearest Neighbors': knn,
    'Decision Tree': dt,
    'Random Forest': rf
}

for name, clf in classifiers.items():
    clf.fit(X_train, y_train)  
    y_pred = clf.predict(X_test)  
    accuracy = accuracy_score(y_test, y_pred)  
    print(f'{name} Accuracy: {accuracy:.4f}')




K-Nearest Neighbors Accuracy: 0.8458
Decision Tree Accuracy: 0.8860
Random Forest Accuracy: 0.9204
