In [None]:
import pandas as pd
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score



In [None]:
df_t = pd.read_csv("Bird_strikes.csv")
df=df_t.loc[:,["MakeModel","NumberStruckActual",'Effect','Cost','Altitude','FlightDate']]
df



In [None]:
df.dropna(inplace=True)
df['Cost'] = df['Cost'].str.replace(',', '').astype(float)
df

In [None]:
X = df[['Altitude']].values
y = df['Cost'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
knn = KNeighborsRegressor(n_neighbors=3)

knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)


In [None]:
r2 = r2_score(y_test, y_pred)
print(f'R² score: {r2:.2f}')

mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')


In [None]:

results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})

fig = px.scatter(results, x='Actual', y='Predicted', title='Actual vs Predicted Cost')
fig.add_shape(
    type='line',
    x0=min(results['Actual']),
    y0=min(results['Actual']),
    x1=max(results['Actual']),
    y1=max(results['Actual']),
    line=dict(color='Red')
)
fig.update_layout(xaxis_title='Actual Cost', yaxis_title='Predicted Cost')

fig.show()
