In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
from sklearn.model_selection import GridSearchCV
from sklearn.impute import SimpleImputer


df = pd.read_excel('/content/accident_data (1).xlsx')


weather_conditions = {'Clear': 1, 'Cloudy': 2, 'Rain': 3, 'Snow': 4, 'Fog': 5}
df['Weather_Severity'] = df['Weather'].map(weather_conditions)

road_conditions = {'Dry': 1, 'Wet': 2, 'Icy': 3}
df['Road_Condition_Severity'] = df['Road_Condition'].map(road_conditions)

df['Rush_Hour'] = df['Time_of_Day'].apply(lambda x: 1 if 6 <= x <= 9 or 16 <= x <= 19 else 0)
df['Speed_Severity'] = df['Speed_Limit'].apply(lambda x: 1 if x > 50 else 0)
df['Traffic_Condition'] = df['Traffic_Volume'].apply(lambda x: 'Low' if x < 500 else 'High')


categorical_features = ['Weather', 'Road_Condition', 'Traffic_Condition']
preprocessor = ColumnTransformer(transformers=[('cat', OneHotEncoder(), categorical_features)], remainder='passthrough')

X = df.drop('Accident_Severity', axis=1)
y = df['Accident_Severity']

X_encoded = preprocessor.fit_transform(X)

imputer = SimpleImputer(strategy='mean')
X_encoded = imputer.fit_transform(X_encoded)

scaler = StandardScaler()
X_encoded = scaler.fit_transform(X_encoded)

X_encoded_df = pd.DataFrame(X_encoded, columns=[f"feature_{i}" for i in range(X_encoded.shape[1])])


def calculate_vif(df):
    vif = pd.DataFrame()
    vif["Feature"] = df.columns
    vif["VIF"] = [variance_inflation_factor(df.values, i) for i in range(df.shape[1])]
    return vif

vif_data = calculate_vif(X_encoded_df)
print(vif_data)

X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=0)


ridge = Ridge()
lasso = Lasso()


params = {'alpha': [0.01, 0.1, 1, 10, 100]}
ridge_cv = GridSearchCV(ridge, param_grid=params, cv=5)
lasso_cv = GridSearchCV(lasso, param_grid=params, cv=5)

ridge_cv.fit(X_train, y_train)
lasso_cv.fit(X_train, y_train)

best_alpha_ridge = ridge_cv.best_params_['alpha']
best_alpha_lasso = lasso_cv.best_params_['alpha']


y_pred_ridge = ridge_cv.predict(X_test)
y_pred_lasso = lasso_cv.predict(X_test)


def evaluate_model(y_true, y_pred):
    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true, y_pred)
    print(f"MAE: {mae:.4f}, MSE: {mse:.4f}, RMSE: {rmse:.4f}, R2: {r2:.4f}")


print("Ridge Model Performance:")
evaluate_model(y_test, y_pred_ridge)

print("Lasso Model Performance:")
evaluate_model(y_test, y_pred_lasso)


rush_hour_colors = df.loc[y_test.index, 'Rush_Hour'].map({1: 'red', 0: 'blue'})

fig = px.scatter(x=y_test, y=y_pred_ridge, color=rush_hour_colors,
                 labels={'x': 'Actual Severity', 'y': 'Predicted Severity'},
                 title="Actual vs Predicted Severity (Ridge Model - Interactive)")
fig.show()


fig = px.scatter(x=y_test, y=y_pred_lasso, color=rush_hour_colors,
                 labels={'x': 'Actual Severity', 'y': 'Predicted Severity'},
                 title="Actual vs Predicted Severity (Lasso Model - Interactive)")
pie_data = df.groupby(['Time_of_Day', 'Weather', 'Road_Condition', 'Speed_Limit', 'Traffic_Volume']).size().reset_index(name='counts')
fig_pie = px.pie(pie_data, values='counts', names='Time_of_Day',
                 title="Accidents Breakdown by Time of Day, Weather, Road Condition, Speed Limit, Traffic Volume",
                 color_discrete_sequence=px.colors.sequential.Rainbow)
fig_pie.show()

df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].dt.strftime('%Y-%m-%d')
weather_data['Date'] = pd.to_datetime(weather_data['Date'])


weather_data['Date'] = weather_data['Date'].dt.strftime('%Y-%m-%d')
weather_data = pd.read_excel('/content/weather_data.xlsx')
traffic_data = pd.read_excel('/content/traffic_data.xlsx')


merged_df = pd.merge(df, weather_data[['Date', 'Location', 'Temperature', 'Precipitation']],
                     on=['Date', 'Location'], how='left')
merged_df = pd.merge(merged_df, traffic_data[['Date', 'Location', 'Traffic_Volume']],
                     on=['Date', 'Location'], how='left')



divide by zero encountered in scalar divide



       Feature       VIF
0    feature_0       inf
1    feature_1       inf
2    feature_2       inf
3    feature_3       inf
4    feature_4       inf
5    feature_5       inf
6    feature_6       inf
7    feature_7       inf
8    feature_8       inf
9    feature_9       inf
10  feature_10  1.068328
11  feature_11  4.553748
12  feature_12  4.232079
13  feature_13       inf
14  feature_14       inf
15  feature_15  1.085554
16  feature_16  4.674188
Ridge Model Performance:
MAE: 0.8987, MSE: 1.0559, RMSE: 1.0276, R2: -0.1144
Lasso Model Performance:
MAE: 0.9050, MSE: 1.0881, RMSE: 1.0431, R2: -0.1484


KeyError: 'Date'

In [2]:
import pandas as pd
import numpy as np

# Number of data points to generate
num_data_points = 100

# Generate random data for the features
# Adjust the ranges and data types as needed for your specific use case
data = {
    'Time_of_Day': np.random.randint(0, 24, num_data_points),
    'Weather': np.random.choice(['Sunny', 'Rainy', 'Cloudy', 'Foggy'], num_data_points),
    'Road_Condition': np.random.choice(['Dry', 'Wet', 'Icy'], num_data_points),
    'Speed_Limit': np.random.choice([30, 40, 50, 60, 70], num_data_points),
    'Traffic_Volume': np.random.randint(100, 1000, num_data_points),
    # ... add other features as needed
    'Accident_Severity': np.random.choice([1, 2, 3], num_data_points)  # Example: 1-Minor, 2-Moderate, 3-Severe
}

# Create a Pandas DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to an Excel file
df.to_excel('accident_data.xlsx', index=False)

print("Data generated and saved to 'accident_data.xlsx'")

Data generated and saved to 'accident_data.xlsx'
