# New section

In [114]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [115]:
!cp /content/drive/My\ Drive/Flight_delay.ipynb /content/flight-delay-prediction/


cp: cannot stat '/content/drive/My Drive/Flight_delay.ipynb': No such file or directory


In [None]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

import pandas as pd
import numpy as np


In [None]:
#load Dataset
df = pd.read_csv('Generated_Flight_Data.csv')

In [None]:
#Ensure that the columns that are categorical are encoded into numeric values
categorical_columns = ['Carrier', 'Origin', 'Destination', 'Weather']
encoder = OneHotEncoder(sparse_output=False)
encoded_columns = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(categorical_columns))

#Apply one-hot encoding to the catregorical colums
encoded_columns = encoder.fit_transform(df[categorical_columns])

In [None]:
#convert encoded columns back to a DataFrame
encoded_categorical_df = pd.DataFrame(encoded_columns, columns=encoder.get_feature_names_out(categorical_columns))

In [None]:
#Drop the original categorical columns from the original DataFrame
df_numeric = df.drop(columns = categorical_columns)

In [None]:
#concatenate the encoded categorical columns with the rest of the numeric data
df_processed = pd.concat([df_numeric, encoded_categorical_df], axis=1)

In [None]:

#Separate features (X) and target variable(y)
X = df_processed.drop(['Delay'], axis=1)
y = df_processed['Delay']

In [None]:

#Split the data into training and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [None]:
print(X_train.columns)


Index(['FlightNumber', 'ScheduledDepTime', 'ActualDepTime', 'Carrier_AA',
       'Carrier_BA', 'Carrier_DL', 'Carrier_SW', 'Carrier_UA', 'Origin_ATL',
       'Origin_JFK', 'Origin_LAX', 'Origin_MIA', 'Origin_MSP',
       'Destination_ATL', 'Destination_DEN', 'Destination_LAX',
       'Destination_ORD', 'Destination_SFO', 'Weather_Clear', 'Weather_Fog',
       'Weather_Rain', 'Weather_Snow', 'Weather_Storm'],
      dtype='object')


In [None]:
non_numeric_columns = X_train.select_dtypes(include=['object']).columns
print(non_numeric_columns)


Index(['ScheduledDepTime', 'ActualDepTime'], dtype='object')


In [None]:
X_train['ScheduledDepTime'] = pd.to_datetime(X_train['ScheduledDepTime'], format='%H:%M').apply(lambda x: x.hour * 60 + x.minute)
X_train['ActualDepTime'] = pd.to_datetime(X_train['ActualDepTime'], format='%H:%M').apply(lambda x: x.hour * 60 + x.minute)
X_val['ScheduledDepTime'] = pd.to_datetime(X_val['ScheduledDepTime'], format='%H:%M').apply(lambda x: x.hour * 60 + x.minute)
X_val['ActualDepTime'] = pd.to_datetime(X_val['ActualDepTime'], format='%H:%M').apply(lambda x: x.hour * 60 + x.minute)
X_test['ScheduledDepTime'] = pd.to_datetime(X_test['ScheduledDepTime'], format='%H:%M').apply(lambda x: x.hour * 60 + x.minute)
X_test['ActualDepTime'] = pd.to_datetime(X_test['ActualDepTime'], format='%H:%M').apply(lambda x: x.hour * 60 + x.minute)


In [None]:
X_train = X_train.to_numpy().astype('float32')
y_train = y_train.to_numpy().astype('float32')
X_val = X_val.to_numpy().astype('float32')
y_val = y_val.to_numpy().astype('float32')
X_test = X_test.to_numpy().astype('float32')
y_test = y_test.to_numpy().astype('float32')

In [None]:
#Check for the shapes of the datasets to ensure everything is correct
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_val shape:", X_val.shape)
print("y_val shape:", y_val.shape)

X_train shape: (70, 23)
y_train shape: (70,)
X_val shape: (15, 23)
y_val shape: (15,)


In [None]:
from tensorflow.keras.optimizers import Adam
model = Sequential()
model.add(layers.InputLayer(shape=(23,)))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1, activation='linear'))


optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='mse', metrics=['mae'])
model.summary()




In [None]:
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_train type:", X_train.dtype)
print("y_train type:", y_train.dtype)


X_train shape: (70, 23)
y_train shape: (70,)
X_train type: float32
y_train type: float32


In [None]:
#step 3: Train the model
history = model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val), batch_size=32)

Epoch 1/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 122ms/step - loss: 4359.1362 - mae: 54.3712 - val_loss: 4503.5381 - val_mae: 62.9004
Epoch 2/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - loss: 3533.7129 - mae: 49.1193 - val_loss: 3376.2163 - val_mae: 52.6517
Epoch 3/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - loss: 2751.2236 - mae: 42.9325 - val_loss: 2497.5742 - val_mae: 42.7860
Epoch 4/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - loss: 2431.7991 - mae: 40.9983 - val_loss: 1859.0397 - val_mae: 34.3909
Epoch 5/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - loss: 2301.0388 - mae: 40.6699 - val_loss: 1471.9246 - val_mae: 27.5036
Epoch 6/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step - loss: 2258.7563 - mae: 40.6372 - val_loss: 1254.5715 - val_mae: 25.5787
Epoch 7/100
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[

In [None]:
#step 4: Evaluate the model on the test set
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Time delay error of around: {np.sqrt(mse)}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Mean Squared Error: 956.9718017578125
Time delay error of around: 30.934961318969727


In [None]:
!apt-get install git

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.11).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [None]:
!git clone https://github.com/JuneshG/flight-delay-prediction.git

Cloning into 'flight-delay-prediction'...


mv: cannot stat '/content/flight-delay-prediction/Generated_Flight_Data.csv': No such file or directory


In [None]:
!mv /content/Generated_Flight_Data.csv /content/flight-delay-prediction/
!mv /content/Processed_Flight_Data.csv /content/flight-delay-prediction/


In [None]:
!mv /content/Flight_delay.ipynb /content/flight-delay-prediction/


mv: cannot stat '/content/Flight_delay.ipynb': No such file or directory


In [None]:
!ls /content


flight-delay-prediction  sample_data


In [None]:
!find /content -name "*.ipynb"
