In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, accuracy_score, roc_auc_score
from sklearn.neural_network import MLPClassifier
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from imblearn.over_sampling import SMOTE
import joblib

# Load dataset
data = pd.read_csv(r"C:\Users\sarita\Downloads\updated110_mergefiles.csv")

# Preprocessing
columns_to_use = ['city', 'lon', 'lat', 'weather_main', 'weather_description', 'temp', 
                  'feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 'visibility', 
                  'wind_speed', 'wind_deg', 'rain_1h', 'clouds_all', 'sunrise', 'sunset']

# Define severity levels based on thresholds
def calculate_severity(row):
    if row['visibility'] >= 8000 and row['wind_speed'] <= 5 and row['temp'] >= 15:
        return 0  # No severity
    elif row['visibility'] >= 5000 and row['wind_speed'] <= 10:
        return 1  # Low severity
    elif row['visibility'] >= 2000 and row['wind_speed'] <= 15:
        return 2  # Medium severity
    else:
        return 3  # High severity

# Calculate severity
X = data[columns_to_use]
data['severity'] = X.apply(calculate_severity, axis=1)

# Target column
target_column = 'severity'

y = data[target_column]

# Handle missing values
imputer = SimpleImputer(strategy='most_frequent')
X = pd.DataFrame(imputer.fit_transform(X), columns=X.columns)
y = pd.Series(imputer.fit_transform(y.values.reshape(-1, 1)).ravel(), name=y.name)

# Encode categorical features
categorical_cols = ['city', 'weather_main', 'weather_description']
label_encoders = {}
for col in categorical_cols:
    le = LabelEncoder()
    X[col] = le.fit_transform(X[col])
    label_encoders[col] = le

# Scale numerical features
numerical_cols = ['lon', 'lat', 'temp', 'feels_like', 'temp_min', 'temp_max', 'pressure', 'humidity', 
                  'visibility', 'wind_speed', 'wind_deg', 'rain_1h', 'clouds_all', 'sunrise', 'sunset']
scaler = StandardScaler()
X[numerical_cols] = scaler.fit_transform(X[numerical_cols])

# Feature Selection
selector = SelectKBest(score_func=mutual_info_classif, k=10)
X_selected = selector.fit_transform(X, y)
selected_features = X.columns[selector.get_support()]
print("Selected Features:", selected_features)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# Random Forest Pipeline
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("Random Forest Classifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Classification Report:\n", classification_report(y_test, y_pred_rf))

# Gradient Boosting Pipeline
gb_model = GradientBoostingClassifier(random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)
print("\nGradient Boosting Classifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print("Classification Report:\n", classification_report(y_test, y_pred_gb))

# Neural Network Pipeline
nn_model = MLPClassifier(random_state=42, hidden_layer_sizes=(64, 32), max_iter=300)
nn_model.fit(X_train, y_train)
y_pred_nn = nn_model.predict(X_test)
print("\nNeural Network Classifier:")
print("Accuracy:", accuracy_score(y_test, y_pred_nn))
print("Classification Report:\n", classification_report(y_test, y_pred_nn))

# Merge Models and Save for Reuse
def save_combined_models(models, file_path):
    combined = {name: model for name, model in models.items()}
    joblib.dump(combined, file_path)

models = {
    "RandomForest": rf_model,
    "GradientBoosting": gb_model,
    "NeuralNetwork": nn_model
}
save_combined_models(models, r"C:\Users\sarita\Downloads\combined_models.pkl")
joblib.dump(scaler, r"C:\Users\sarita\Downloads\scaler.pkl")
joblib.dump(label_encoders, r"C:\Users\sarita\Downloads\label_encoders.pkl")



Selected Features: Index(['city', 'lon', 'lat', 'temp', 'feels_like', 'temp_min', 'temp_max',
       'visibility', 'sunrise', 'sunset'],
      dtype='object')
Random Forest Classifier:
Accuracy: 0.9998840105826535
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     16946
           1       1.00      1.00      1.00    138180
           2       1.00      1.00      1.00     15678
           3       1.00      1.00      1.00     10247

    accuracy                           1.00    181051
   macro avg       1.00      1.00      1.00    181051
weighted avg       1.00      1.00      1.00    181051


Gradient Boosting Classifier:
Accuracy: 0.9340241147521969
Classification Report:
               precision    recall  f1-score   support

           0       0.64      1.00      0.78     16946
           1       1.00      0.92      0.96    138180
           2       0.96      0.97      0.97     15678
           3       0.87   

['C:\\Users\\sarita\\Downloads\\label_encoders.pkl']

In [4]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib

# Load saved models and preprocessing artifacts
models = joblib.load(r"C:\Users\sarita\Downloads\combined_models.pkl")
scaler = joblib.load(r"C:\Users\sarita\Downloads\scaler.pkl")
label_encoders = joblib.load(r"C:\Users\sarita\Downloads\label_encoders.pkl")

# Define input features
categorical_cols = ['city', 'weather_main', 'weather_description']
numerical_cols = ['lon', 'lat', 'temp', 'feels_like', 'temp_min', 'temp_max',
                  'pressure', 'humidity', 'visibility', 'wind_speed', 'wind_deg',
                  'rain_1h', 'clouds_all', 'sunrise', 'sunset']

# Streamlit UI
st.title("Traffic Accident Severity Prediction")
st.write("Input the features below to predict the severity level:")

# Input fields for categorical variables
user_input = {}
st.subheader("Categorical Features")
for col in categorical_cols:
    categories = label_encoders[col].classes_
    user_input[col] = st.selectbox(f"{col}:", categories)

# Input fields for numerical variables
st.subheader("Numerical Features")
for col in numerical_cols:
    min_val, max_val = 0, 100  # Adjust based on data
    user_input[col] = st.slider(f"{col}:", min_val, max_val, step=1)

# Preprocess user inputs
input_data = pd.DataFrame([user_input])
for col in categorical_cols:
    input_data[col] = label_encoders[col].transform(input_data[col])
input_data[numerical_cols] = scaler.transform(input_data[numerical_cols])

# Select model for prediction
st.subheader("Choose Model")
model_choice = st.selectbox("Select a model:", ["RandomForest", "GradientBoosting", "NeuralNetwork"])
selected_model = models[model_choice]

# Predict and display results
if st.button("Predict Severity"):
    severity_prediction = selected_model.predict(input_data)
    severity_level = int(severity_prediction[0])
    st.write(f"*Predicted Severity Level:* {severity_level}")

    # Add a visualization for severity probabilities (if applicable)
    if hasattr(selected_model, "predict_proba"):
        probabilities = selected_model.predict_proba(input_data)[0]
        st.write("*Probability Distribution:*")
        severity_classes = ["No Severity (0)", "Low Severity (1)", "Medium Severity (2)", "High Severity (3)"]
        st.bar_chart(pd.DataFrame({"Severity": severity_classes, "Probability": probabilities}))
        # Save the code to a Python file
file_path = r"C:\Users\sarita\Downloads /traffic_severity_app.py"
with open(r"C:\Users\sarita\Downloads /traffic_severity_app.py", "w") as file:
    file.write(streamlit_app_code)

r"C:\Users\sarita\Downloads /traffic_severity_app.py"




FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\sarita\\Downloads /traffic_severity_app.py'