In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [9]:
data = pd.read_csv('air-traffic-prediction/AirtrafficA4.csv')
data['PASSENGERS CARRIED'] = pd.to_numeric(data['PASSENGERS CARRIED'].str.replace(',', ''))
data["YEAR_MONTH"] = pd.to_datetime(data['YEAR'].astype(str) + '-' + data['MONTH'].str[:3] + '-01', format='%Y-%b-%d')
data.set_index("YEAR_MONTH", inplace=True)
data = data.sort_values(by='YEAR_MONTH', kind='mergesort')
# data.plot(y='PASSENGERS CARRIED')
data

In [10]:
passengers = data['PASSENGERS CARRIED'].dropna()
passengers

In [11]:
formatted_data = ", ".join(" ".join(list(str(int(val)))) for val in passengers)
formatted_data

In [12]:
prompt = f"Given the following data for air passengers by month, where each  is separated by a comma: {formatted_data} .Predict the next 12 values yourself without giving me the code for it."
print(prompt)

# response = openai.Completion.create(
# 	engine = "",
# 	prompt = prompt,
# 	temperature = 0,
# 	max_tokens = 256,
# 	top_p = 1,
# 	frequency_penalty = 0,
# 	presence_penalty = 0
# )




In [13]:
# save the prompt in a file
with open("prompt.txt", "w") as file:
    file.write(prompt)

In [14]:
from statsforecast.adapters.prophet import AutoARIMAProphet
# from prophet import Prophet

In [15]:
model = AutoARIMAProphet()
# model = Prophet()
data = data[:].copy()
data.loc[:, 'ds'] = data.index
data.loc[:, 'y'] = data['PASSENGERS CARRIED']
model.fit(data)

In [16]:
future = model.make_future_dataframe(periods=12, freq='MS')
future.tail(20)

In [17]:
forecast = model.predict(future)
forecast.tail(12)

In [18]:
fig = model.plot(forecast, include_legend=True)

In [19]:
forecast = np.array(forecast['yhat'][-12:])

In [20]:
def make_dataframe(forecast):
	forecast_df = pd.DataFrame(forecast, columns=['PASSENGERS CARRIED'])
	forecast_df['YEAR_MONTH'] = ('2023 SEP', '2023 OCT', '2023 NOV', '2023 DEC', '2024 JAN', '2024 FEB', '2024 MAR', '2024 APR', '2024 MAY', '2024 JUN', '2024 JUL', '2024 AUG')
	forecast_df = forecast_df.reindex(columns = ['YEAR_MONTH', 'PASSENGERS CARRIED'])
	return forecast_df

In [21]:
forecast_df = make_dataframe(forecast)
forecast_df.to_csv('submission_1.csv', index=False)
forecast_df

In [22]:
data.head()

In [23]:
def create_time_feature(df):
    df['dayofmonth'] = df['ds'].dt.day
    df['dayofweek'] = df['ds'].dt.dayofweek
    df['dayofyear'] = df['ds'].dt.dayofyear
    df['month'] = df['ds'].dt.month
    df['year'] = df['ds'].dt.year
    
    return df

In [24]:
xgb_data = create_time_feature(data)

In [25]:
data.head()

In [26]:
# take only the time columns 
xgb_data = xgb_data[['dayofmonth', 'dayofweek', 'dayofyear', 'month', 'year', 'y']]
# drop nan values
xgb_data = xgb_data.dropna()
xgb_data.head()

In [27]:
xgb_x_train = xgb_data.drop('y', axis=1)
xgb_y_train = xgb_data['y']

In [28]:
xgb_x_train

In [29]:
xgb_y_train

In [30]:
import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import RandomForestRegressor
reg = xgb.XGBRegressor(n_estimators=2000)
# reg = lgb.LGBMRegressor(n_estimators=2000)
# reg = RandomForestRegressor(n_estimators=5000)


In [31]:

reg.fit(xgb_x_train, xgb_y_train)

In [32]:
xgb_future = model.make_future_dataframe(periods=12, freq='MS')
xgb_future = create_time_feature(xgb_future)
xgb_future = xgb_future[['dayofmonth', 'dayofweek', 'dayofyear', 'month', 'year']]
xgb_future


In [33]:
# make the fitted plot
fitted = reg.predict(xgb_future)
fitted.shape

In [34]:
fitted

In [35]:
# add the ds column to xgb_future
xgb_future['ds'] = pd.date_range(start='2013-01-01', periods=140, freq='MS')
xgb_future

In [36]:
# plot thefitted values on the original data
plt.plot(xgb_future['ds'], fitted, color='red')
plt.plot(data['ds'], data['y'], color='black')

plt.show()

In [37]:
def make_dataframe(forecast):
	forecast_df = pd.DataFrame(forecast, columns=['PASSENGERS CARRIED'])
	forecast_df['YEAR_MONTH'] = ('2023 SEP', '2023 OCT', '2023 NOV', '2023 DEC', '2024 JAN', '2024 FEB', '2024 MAR', '2024 APR', '2024 MAY', '2024 JUN', '2024 JUL', '2024 AUG')
	forecast_df = forecast_df.reindex(columns = ['YEAR_MONTH', 'PASSENGERS CARRIED'])
	return forecast_df

In [38]:
fitted

In [39]:
# make the forecast dataframe for the last 12 months
forecast_df = make_dataframe(fitted[-12:])
forecast_df.to_csv('submission_2.csv', index=False)
forecast_df