### Importing the required libraries

In [93]:
import numpy as np
import pandas as pd

### Reading the csv file and creating the dataset

In [94]:
data = pd.read_csv('air-traffic-prediction/AirtrafficA4.csv')
data['PASSENGERS CARRIED'] = pd.to_numeric(data['PASSENGERS CARRIED'].str.replace(',', '')).dropna()
data["YEAR_MONTH"] = pd.to_datetime(data['YEAR'].astype(str) + '-' + data['MONTH'].str[:3] + '-01', format='%Y-%b-%d')
data = data.sort_values(by='YEAR_MONTH', kind='mergesort')
data

Unnamed: 0,AIRLINE,YEAR,MONTH,TOTAL DEPARTURES,HOURS FLOWN,KILOMETRE FLOWN,PASSENGERS CARRIED,AVAILABLE SEAT KILOMETRE (IN THOUSAND),FREIGHT CARRIED (IN TONNE),MAIL CARRIED (IN TONNE),YEAR_MONTH
116,A007,2013,JAN,10552,18655,10112,1408012.0,1820105,6465.00,0,2013-01-01
117,A007,2013,FEB,9873,17374,9439,1341210.0,1698930,6235.00,0,2013-02-01
118,A007,2013,MAR,11393,20093,11028,1423569.0,1984886,6505.00,0,2013-03-01
119,A007,2013,APR,11426,20084,11090,1511094.0,1996084,5903.00,0,2013-04-01
120,A007,2013,MAY,11885,20779,11533,1685168.0,2075882,7345.00,0,2013-05-01
...,...,...,...,...,...,...,...,...,...,...,...
3,A007,2023,APR,48752,84232,42615,7406440.0,8005648,19432.80,2102.9,2023-04-01
4,A007,2023,MAY,50956,87917,44505,8109626.0,8375201,24165.10,2102.4,2023-05-01
5,A007,2023,JUNE,49989,86217,43739,7893296.0,8254272,23522.60,2383.0,2023-06-01
6,A007,2023,JULY,52127,90528,45404,7674890.0,8577184,24885.80,2585.0,2023-07-01


#### Dropping the nan values

In [95]:
passengers = data[['YEAR_MONTH', 'PASSENGERS CARRIED']]
passengers.reset_index(inplace=True)
passengers

Unnamed: 0,index,YEAR_MONTH,PASSENGERS CARRIED
0,116,2013-01-01,1408012.0
1,117,2013-02-01,1341210.0
2,118,2013-03-01,1423569.0
3,119,2013-04-01,1511094.0
4,120,2013-05-01,1685168.0
...,...,...,...
123,3,2023-04-01,7406440.0
124,4,2023-05-01,8109626.0
125,5,2023-06-01,7893296.0
126,6,2023-07-01,7674890.0


#### Generating string from time series

In [98]:
formatted_data = ''
for i in range(len(passengers)):
	formatted_data += str(passengers['YEAR_MONTH'][i]) + ', ' + str(passengers['PASSENGERS CARRIED'][i]) + ', '
print(formatted_data)

2013-01-01 00:00:00, 1408012.0, 2013-02-01 00:00:00, 1341210.0, 2013-03-01 00:00:00, 1423569.0, 2013-04-01 00:00:00, 1511094.0, 2013-05-01 00:00:00, 1685168.0, 2013-06-01 00:00:00, 1480879.0, 2013-07-01 00:00:00, 1445248.0, 2013-08-01 00:00:00, 1531406.0, 2013-09-01 00:00:00, 1378691.0, 2013-10-01 00:00:00, 1510184.0, 2013-11-01 00:00:00, 1467763.0, 2013-12-01 00:00:00, 1575872.0, 2014-01-01 00:00:00, 1426580.0, 2014-02-01 00:00:00, 1464070.0, 2014-03-01 00:00:00, 1601141.0, 2014-04-01 00:00:00, 1679963.0, 2014-05-01 00:00:00, 1908334.0, 2014-06-01 00:00:00, 1797101.0, 2014-07-01 00:00:00, 1599977.0, 2014-08-01 00:00:00, 1858664.0, 2014-09-01 00:00:00, 1907378.0, 2014-10-01 00:00:00, 1890273.0, 2014-11-01 00:00:00, 1967992.0, 2014-12-01 00:00:00, 2324221.0, 2015-01-01 00:00:00, 2276404.0, 2015-02-01 00:00:00, 2230645.0, 2015-03-01 00:00:00, 2286128.0, 2015-04-01 00:00:00, 2481285.0, 2015-05-01 00:00:00, 2769283.0, 2015-06-01 00:00:00, 2536554.0, 2015-07-01 00:00:00, 2416916.0, 2015-08-

#### Generating the prompt

In [103]:
prompt = f"The data given below is a time series data for the air traffic of an airline per month. The data points are given along with their date and time separated by a comma. Use this data to predict the values of the next 12 months. There is a huge drop in the value because of the covid time in which the airline was closed. Thus take that into account and predict the next 12 values. Return the prediction in the same format as you are getting the data. Use Exponential smoothening method for predicting the values. Do not write any code or any message. Use exponential smoothing \n {formatted_data}"
print(prompt)

The data given below is a time series data for the air traffic of an airline per month. The data points are given along with their date and time separated by a comma. Use this data to predict the values of the next 12 months. There is a huge drop in the value because of the covid time in which the airline was closed. Thus take that into account and predict the next 12 values. Return the prediction in the same format as you are getting the data. Use Exponential smoothening method for predicting the values. Do not write any code or any message. Use exponential smoothing 
 2013-01-01 00:00:00, 1408012.0, 2013-02-01 00:00:00, 1341210.0, 2013-03-01 00:00:00, 1423569.0, 2013-04-01 00:00:00, 1511094.0, 2013-05-01 00:00:00, 1685168.0, 2013-06-01 00:00:00, 1480879.0, 2013-07-01 00:00:00, 1445248.0, 2013-08-01 00:00:00, 1531406.0, 2013-09-01 00:00:00, 1378691.0, 2013-10-01 00:00:00, 1510184.0, 2013-11-01 00:00:00, 1467763.0, 2013-12-01 00:00:00, 1575872.0, 2014-01-01 00:00:00, 1426580.0, 2014-02

In [104]:
# save the prompt in a file for easier usage
with open("prompt.txt", "w") as file:
    file.write(prompt)

In [130]:
prompt_response = """
2023-09-01 00:00:00, 7729142.7,
2023-10-01 00:00:00, 7927840.3,
2023-11-01 00:00:00, 8146792.1,
2023-12-01 00:00:00, 8341287.9,
2024-01-01 00:00:00, 8182641.4,
2024-02-01 00:00:00, 7971184.5,
2024-03-01 00:00:00, 8462486.9,
2024-04-01 00:00:00, 8553750.2,
2024-05-01 00:00:00, 9248628.1,
2024-06-01 00:00:00, 8991845.9,
2024-07-01 00:00:00, 8763097.8,
2024-08-01 00:00:00, 8929482.4
"""

In [131]:
prompt_response = prompt_response.replace('\n', '').replace(' ', '').split(',')
display(prompt_response)
prompt_response_1 = []
for i in range(len(prompt_response)):
	if i % 2 == 1:
		prompt_response_1.append(int(float(prompt_response[i])))
prompt_response = prompt_response_1
prompt_response

['2023-09-0100:00:00',
 '7729142.7',
 '2023-10-0100:00:00',
 '7927840.3',
 '2023-11-0100:00:00',
 '8146792.1',
 '2023-12-0100:00:00',
 '8341287.9',
 '2024-01-0100:00:00',
 '8182641.4',
 '2024-02-0100:00:00',
 '7971184.5',
 '2024-03-0100:00:00',
 '8462486.9',
 '2024-04-0100:00:00',
 '8553750.2',
 '2024-05-0100:00:00',
 '9248628.1',
 '2024-06-0100:00:00',
 '8991845.9',
 '2024-07-0100:00:00',
 '8763097.8',
 '2024-08-0100:00:00',
 '8929482.4']

[7729142,
 7927840,
 8146792,
 8341287,
 8182641,
 7971184,
 8462486,
 8553750,
 9248628,
 8991845,
 8763097,
 8929482]

In [132]:
def make_dataframe(forecast):
	forecast_df = pd.DataFrame(forecast, columns=['PASSENGERS CARRIED'])
	forecast_df['YEAR_MONTH'] = ('2023 SEP', '2023 OCT', '2023 NOV', '2023 DEC', '2024 JAN', '2024 FEB', '2024 MAR', '2024 APR', '2024 MAY', '2024 JUN', '2024 JUL', '2024 AUG')
	forecast_df = forecast_df.reindex(columns = ['YEAR_MONTH', 'PASSENGERS CARRIED'])
	return forecast_df

In [133]:
forecast_df = make_dataframe(prompt_response)
forecast_df.to_csv("submission_prompt.csv", index=False)
forecast_df

Unnamed: 0,YEAR_MONTH,PASSENGERS CARRIED
0,2023 SEP,7729142
1,2023 OCT,7927840
2,2023 NOV,8146792
3,2023 DEC,8341287
4,2024 JAN,8182641
5,2024 FEB,7971184
6,2024 MAR,8462486
7,2024 APR,8553750
8,2024 MAY,9248628
9,2024 JUN,8991845
