In [2]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Step 1: Check the current working directory
print("Current working directory:", os.getcwd())

# Step 2: load and Understand the Dataset

file_path = 'c:/Users/hp480/OneDrive/Desktop/Road_analysis/data/road_accident.csv'

# Check file content
with open(file_path, 'r') as file:
    content = file.read()
    print("File content:")
    print(content)

try:
    df = pd.read_csv(file_path)  # Use absolute path if necessary
    print(df.head())
    print(df.describe())

    # Print actual column names
    print("Column names:", df.columns.tolist())

    # Strip any leading/trailing spaces from column names
    df.columns = df.columns.str.strip()

    # Identify the dependent and independent variables
    # Example independent variables (you may need to adjust based on your actual dataset)
    X = df[['speed', 'weather', 'road_condition', 'vehicle_type']]
    y = df['accident_severity']

     # Step 3: Create the Linear Regression Model
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

   
    model = LinearRegression()
    model.fit(X_train, y_train)

    
    y_pred = model.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f'Mean Squared Error: {mse}')
    print(f'R^2 Score: {r2}')

   
    # Step 4: Save the Model for Future Use
    joblib.dump(model, 'c:/Users/hp480/OneDrive/Desktop/Road_analysis/models/linear_regression_model.pkl')

    # Step 5: Predict Accident Severity for a Hypothetical Set of Independent Variables
    
    # hypthetical data
    hypothetical_data = pd.DataFrame({
        'speed': [45],
        'weather': [1],  
        'road_condition': [2],  
        'vehicle_type': [1]  
    })

    # Predict accident severity
    predicted_severity = model.predict(hypothetical_data)
    print(f'Predicted Accident Severity: {predicted_severity[0]}')

    # Step 6: Explain the Benefits in Underdeveloped Countries
    benefits = """
    A linear regression model for predicting road accident severity can help in underdeveloped countries by:
    1. Identifying High-Risk Factors: Understanding which factors (e.g., high speeds, poor road conditions) contribute most to severe accidents.
    2. Implementing Preventive Measures: Authorities can focus on improving road conditions, enforcing speed limits, and raising awareness about safe driving practices.
    3. Resource Allocation: Efficiently allocating resources like ambulances and traffic police to areas with high predicted accident severity.
    4. Policy Making: Assisting policymakers in creating data-driven regulations to enhance road safety.
    """
    print(benefits)

    

except FileNotFoundError:
    print("CSV file not found. Please check the file path and name.")
except pd.errors.EmptyDataError:
    print("CSV file is empty or not properly formatted.")
except KeyError as e:
    print(f"KeyError: {e}. Please check if the column names are correct.")


Current working directory: c:\Users\hp480\OneDrive\Desktop\Road_analysis\notebooks
File content:
speed,weather ,road_condition,vehicle_type,accident_severity
56,3,1,4,2
65,3,2,2,1
70,1,0,3,2
48,2,2,1,3
82,1,2,4,4
59,3,2,3,1
120,2,3,1,0
100,1,2,3,2
55,3,1,2,4
25,2,2,1,1
20,1,3,3,3
10,2,2,4,0

   speed  weather   road_condition  vehicle_type  accident_severity
0     56         3               1             4                  2
1     65         3               2             2                  1
2     70         1               0             3                  2
3     48         2               2             1                  3
4     82         1               2             4                  4
            speed   weather   road_condition  vehicle_type  accident_severity
count   12.000000  12.000000       12.000000     12.000000          12.000000
mean    59.166667   2.000000        1.833333      2.583333           1.916667
std     32.067637   0.852803        0.834847      1.164500       