In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
file_path = 'training_data_fall2024.csv'
data = pd.read_csv(file_path)

# Preprocessing steps
# List of numerical features
numerical_features = [
    'hour_of_day', 'day_of_week', 'month', 'temp', 'dew', 'humidity',
    'precip', 'snow', 'snowdepth', 'windspeed', 'cloudcover', 'visibility'
]

# Standardize numerical features
scaler = StandardScaler()
data[numerical_features] = scaler.fit_transform(data[numerical_features])

# One-hot encode binary categorical features
data_encoded = pd.get_dummies(data, columns=['holiday', 'weekday', 'summertime'], drop_first=True)

# Encode the target variable as binary
data_encoded['increase_stock'] = data_encoded['increase_stock'].apply(lambda x: 1 if x == 'high_bike_demand' else 0)

# Features (X) and target (y)
X = data_encoded.drop(columns=['increase_stock'])
y = data_encoded['increase_stock']

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],  # Regularization strength
    'solver': ['liblinear', 'lbfgs'],  # Solvers
}

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load dataset
file_path = 'training_data_fall2024.csv'
data = pd.read_csv(file_path)

# Preprocessing steps
# List of numerical features
numerical_features = [
    'hour_of_day', 'day_of_week', 'month', 'temp', 'dew', 'humidity',
    'precip', 'snow', 'snowdepth', 'windspeed', 'cloudcover', 'visibility'
]

# One-hot encode binary categorical features
data_encoded = pd.get_dummies(data, columns=['holiday', 'weekday', 'summertime'], drop_first=True)

# Encode the target variable as binary
data_encoded['increase_stock'] = data_encoded['increase_stock'].apply(lambda x: 1 if x == 'high_bike_demand' else 0)

# Features (X) and target (y)
X = data_encoded.drop(columns=['increase_stock'])
y = data_encoded['increase_stock']

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Standardize numerical features
scaler = StandardScaler()

# Only fit on the training data and transform both training and testing data
X_train[numerical_features] = scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = scaler.transform(X_test[numerical_features])

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],  # Regularization strength
    'solver': ['liblinear', 'lbfgs'],  # Solvers
}

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
# Initialize logistic regression
logistic = LogisticRegression(random_state=42, max_iter=1000)

# Perform grid search
grid_search = GridSearchCV(logistic, param_grid, cv=5, scoring='f1', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best logistic regression model
best_logistic = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

# Predict on the test set
y_pred = best_logistic.predict(X_test)

# Define evaluation function
def evaluate_model(y_true, y_pred):
    return {
        "Accuracy": accuracy_score(y_true, y_pred),
        "Precision": precision_score(y_true, y_pred, zero_division=0),
        "Recall": recall_score(y_true, y_pred, zero_division=0),
        "F1-Score": f1_score(y_true, y_pred, zero_division=0),
    }

# Evaluate logistic regression
logistic_metrics = evaluate_model(y_test, y_pred)

# Display results
logistic_results = pd.DataFrame([logistic_metrics], index=["Logistic Regression"])
print(logistic_results)