In [9]:
# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load dataset
data = pd.read_csv('/content/fight delay prediction .csv')  # Assuming flight_data.csv is the name of your dataset file

# Split dataset into features and target variable
X = data[['departure_hour', 'distance', 'passenger_count', 'weather_delay', 'carrier_delay', 'security_delay', 'late_aircraft_delay', 'nas_delay']]
y = data['flight_delay']

# Splitting the dataset into the training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Training the model
model = LinearRegression()
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
mse = mean_squared_error(y_test, y_pred)
r_squared = r2_score(y_test, y_pred)
print("Mean Squared Error:", mse)
print("R-squared Score:", r_squared)
print("\nModel Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(feature, ':', coef)

# Additional output for better understanding
print("\nPredictions vs Actuals:")
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results)


Mean Squared Error: 39.26633483237856
R-squared Score: 0.5307194129788904

Model Coefficients:
departure_hour : -0.3895579622795973
distance : -0.0038138584090992855
passenger_count : 0.1250609281307409
weather_delay : -0.0638571211094418
carrier_delay : 0.4050819965984933
security_delay : -1.6026857623824387
late_aircraft_delay : 0.1652161893000226
nas_delay : 0.3946038697503968

Predictions vs Actuals:
    Actual  Predicted
29      25  29.802421
15      40  40.401928
24      20  19.710541
17      25  10.476091
8       10   8.186508
9       35  31.667374
30      30  24.879776
