In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression

# Load the data
df = pd.read_csv('C:/Users/vbodavul/Documents/Regression/updated_bike_sharing_data_with_distance.csv')  # Replace with your actual file path
df = df.fillna(df.mean())

# Convert 'rideable_type' to a binary variable, 1 for electric and 0 for normal
df['is_electric'] = (df['rideable_type'] == 'electric_bike').astype(int)

# If needed, create more features or convert categorical variables to dummy/indicator variables
df = pd.get_dummies(df, columns=['start_station_name', 'end_station_name', 'member_casual'])

# Select your features (X) and target variable (y)
X = df[['start_lat', 'start_lng', 'end_lat', 'end_lng', 'Start_altitude', 'End_altitude']]  # Add more features if necessary
y = df['is_electric']


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Add a constant to the model (intercept)
X_train_sm = sm.add_constant(X_train)
X_test_sm = sm.add_constant(X_test)

# Fit the model
lm = sm.OLS(y_train, X_train_sm).fit()
print(lm.summary())


In [None]:
y_pred_train = lm.predict(X_train_sm)
y_pred_test = lm.predict(X_test_sm)

# Calculate RMSE for the train and test sets
rmse_train = np.sqrt(mean_squared_error(y_train, y_pred_train))
rmse_test = np.sqrt(mean_squared_error(y_test, y_pred_test))

print(f'Train RMSE: {rmse_train}')
print(f'Test RMSE: {rmse_test}')


In [None]:
# Fit the model
poisson_model = sm.GLM(y_train, X_train_sm, family=sm.families.Poisson()).fit()
print(poisson_model.summary())


In [None]:
y_pred_train_poisson = poisson_model.predict(X_train_sm)
y_pred_test_poisson = poisson_model.predict(X_test_sm)

# Calculate RMSE for the train and test sets for Poisson model
rmse_train_poisson = np.sqrt(mean_squared_error(y_train, y_pred_train_poisson))
rmse_test_poisson = np.sqrt(mean_squared_error(y_test, y_pred_test_poisson))

print(f'Train RMSE (Poisson): {rmse_train_poisson}')
print(f'Test RMSE (Poisson): {rmse_test_poisson}')
