In [None]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from datetime import datetime
import plotly.express as px

In [None]:
df_t = pd.read_csv("Bird_strikes.csv")
df=df_t.loc[:,["MakeModel","NumberStruckActual",'Effect','Cost','Altitude','FlightDate']]
df



In [None]:
df.dropna(inplace=True)
df['Cost'] = df['Cost'].str.replace(',', '').astype(float)
df['FlightDate'] = pd.to_datetime(df['FlightDate'])
df['Year'] = df['FlightDate'].dt.year
df['Month'] = df['FlightDate'].dt.month
df['Day'] = df['FlightDate'].dt.day
df


In [None]:
X = df[['Year', 'Month', 'Day']]
y = df['Cost']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
model = LinearRegression()

model.fit(X_train, y_train)

y_pred = model.predict(X_test)


In [None]:
r2 = r2_score(y_test, y_pred)
print(f'R² score: {r2:.2f}')

mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')


In [None]:
new_date = pd.DataFrame({'Year': [2025], 'Month': [4], 'Day': [20]})
new_cost_pred = model.predict(new_date)
print(f'Predicted Cost for 2025-4-20: {new_cost_pred[0]:.2f}')


In [None]:
results = pd.DataFrame({'Date': X_test.apply(lambda row: datetime(row['Year'], row['Month'], row['Day']), axis=1), 'Actual': y_test, 'Predicted': y_pred})

fig = px.line(results, x='Date', y=['Actual', 'Predicted'], title='Actual vs Predicted Cost Over Time')
fig.update_layout(xaxis_title='Date', yaxis_title='Cost')

fig.show()
