In [3]:
import pandas as pd

## Preprocess data

In [4]:
#import data and read file
us_import_price = pd.read_csv('Price_of_U.S._Natural_Gas_Imports.csv', skiprows=4)
us_import_price.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Imports Dollars per Thousand Cubic Feet
0,Feb 2022,5.62
1,Jan 2022,6.87
2,Dec 2021,4.74
3,Nov 2021,5.18
4,Oct 2021,4.79


In [5]:
us_import_price.isnull().sum()

Month                                                                 0
Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet    0
dtype: int64

In [6]:
us_import_price.dtypes

Month                                                                  object
Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet    float64
dtype: object

In [7]:
#rename columns
import_price = us_import_price.rename(columns={
   'Month':'Period' , 'Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet': 'Import_price($/Mcf)'
})

import_price.head()

Unnamed: 0,Period,Import_price($/Mcf)
0,Feb 2022,5.62
1,Jan 2022,6.87
2,Dec 2021,4.74
3,Nov 2021,5.18
4,Oct 2021,4.79


### Clean gas export price

In [8]:
export_price = pd.read_csv('Price_of_U.S._Natural_Gas_exports.csv', skiprows=4)
export_price.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Exports Dollars per Thousand Cubic Feet
0,Feb 2022,8.22
1,Jan 2022,7.04
2,Dec 2021,7.4
3,Nov 2021,8.1
4,Oct 2021,7.97


In [9]:
export_price.columns

Index(['Month', 'Price of U.S. Natural Gas Exports  Dollars per Thousand Cubic Feet'], dtype='object')

In [10]:
export_price.isnull().sum()

Month                                                                 0
Price of U.S. Natural Gas Exports  Dollars per Thousand Cubic Feet    0
dtype: int64

In [11]:
#rename columns
export_price = export_price.rename(columns={
   'Month':'Period' , 'Price of U.S. Natural Gas Exports  Dollars per Thousand Cubic Feet': 'Export_price($/Mcf)'
})

export_price.head()

Unnamed: 0,Period,Export_price($/Mcf)
0,Feb 2022,8.22
1,Jan 2022,7.04
2,Dec 2021,7.4
3,Nov 2021,8.1
4,Oct 2021,7.97


## Merge dataframe Import_export_price

In [12]:
import_export_price = import_price.merge(export_price, how='left', on='Period')
import_export_price.head()

Unnamed: 0,Period,Import_price($/Mcf),Export_price($/Mcf)
0,Feb 2022,5.62,8.22
1,Jan 2022,6.87,7.04
2,Dec 2021,4.74,7.4
3,Nov 2021,5.18,8.1
4,Oct 2021,4.79,7.97


In [13]:
#convert data type
import_export_price['Period'] = pd.to_datetime(import_export_price['Period'])
import_export_price.head()

Unnamed: 0,Period,Import_price($/Mcf),Export_price($/Mcf)
0,2022-02-01,5.62,8.22
1,2022-01-01,6.87,7.04
2,2021-12-01,4.74,7.4
3,2021-11-01,5.18,8.1
4,2021-10-01,4.79,7.97


In [14]:
#extract year and month from dataframe
import_export_price['Year'] = import_export_price['Period'].dt.year
import_export_price['Month'] = import_export_price['Period'].dt.month
import_export_price.head()

Unnamed: 0,Period,Import_price($/Mcf),Export_price($/Mcf),Year,Month
0,2022-02-01,5.62,8.22,2022,2
1,2022-01-01,6.87,7.04,2022,1
2,2021-12-01,4.74,7.4,2021,12
3,2021-11-01,5.18,8.1,2021,11
4,2021-10-01,4.79,7.97,2021,10


In [15]:
#reorder columns
price_im_export_df = pd.DataFrame(data =import_export_price,
                                        columns={'Year':'Year',
                                                 'Month':'Month',
                                                'Import_price($/Mcf)': 'Import_price($/Mcf)',
                                               'Export_price($/Mcf)':'Export_price($/Mcf)'})

price_im_export_df

Unnamed: 0,Year,Month,Import_price($/Mcf),Export_price($/Mcf)
0,2022,2,5.62,8.22
1,2022,1,6.87,7.04
2,2021,12,4.74,7.40
3,2021,11,5.18,8.10
4,2021,10,4.79,7.97
...,...,...,...,...
393,1989,5,1.76,2.44
394,1989,4,1.73,2.33
395,1989,3,1.81,2.40
396,1989,2,1.88,2.40


In [16]:
price_im_export_df.dtypes

Year                     int64
Month                    int64
Import_price($/Mcf)    float64
Export_price($/Mcf)    float64
dtype: object

In [17]:
#filter data from 2001- current
price_im_export_df = price_im_export_df[(price_im_export_df.Year >= int(2001))]
price_im_export_df

Unnamed: 0,Year,Month,Import_price($/Mcf),Export_price($/Mcf)
0,2022,2,5.62,8.22
1,2022,1,6.87,7.04
2,2021,12,4.74,7.40
3,2021,11,5.18,8.10
4,2021,10,4.79,7.97
...,...,...,...,...
249,2001,5,4.95,5.50
250,2001,4,5.35,5.65
251,2001,3,5.42,4.93
252,2001,2,6.45,5.80


## Clean gas import and export data

In [18]:
#import data and read file
us_import = pd.read_csv('U.S._Natural_Gas_Imports.csv', skiprows=4)
us_import.head()

Unnamed: 0,Month,U.S. Natural Gas Imports Million Cubic Feet
0,Feb 2022,259389
1,Jan 2022,296179
2,Dec 2021,252626
3,Nov 2021,242405
4,Oct 2021,228203


In [19]:
us_export = pd.read_csv('U.S._Natural_Gas_Exports.csv', skiprows=4 )
us_export.head()

Unnamed: 0,Month,U.S. Natural Gas Exports Million Cubic Feet
0,Feb 2022,545563
1,Jan 2022,610102
2,Dec 2021,620886
3,Nov 2021,556982
4,Oct 2021,545055


In [20]:
#merge two dataframe on Month
us_ex_import = us_import.merge(us_export, how='left', on = 'Month')
us_ex_import.head()

Unnamed: 0,Month,U.S. Natural Gas Imports Million Cubic Feet,U.S. Natural Gas Exports Million Cubic Feet
0,Feb 2022,259389,545563
1,Jan 2022,296179,610102
2,Dec 2021,252626,620886
3,Nov 2021,242405,556982
4,Oct 2021,228203,545055


In [21]:
us_ex_import.isnull().sum()

Month                                           0
U.S. Natural Gas Imports  Million Cubic Feet    0
U.S. Natural Gas Exports  Million Cubic Feet    0
dtype: int64

In [22]:
us_ex_import.columns

Index(['Month', 'U.S. Natural Gas Imports  Million Cubic Feet',
       'U.S. Natural Gas Exports  Million Cubic Feet'],
      dtype='object')

In [23]:
#rename columns
us_ex_import = us_ex_import.rename(columns={
    'Month': 'Period', 'U.S. Natural Gas Imports  Million Cubic Feet': 'Total Imports (Mmcf)',
       'U.S. Natural Gas Exports  Million Cubic Feet': 'Total Exports (Mmcf)'})

us_ex_import.head()

Unnamed: 0,Period,Total Imports (Mmcf),Total Exports (Mmcf)
0,Feb 2022,259389,545563
1,Jan 2022,296179,610102
2,Dec 2021,252626,620886
3,Nov 2021,242405,556982
4,Oct 2021,228203,545055


In [24]:
## covert to date
us_ex_import['Period'] = pd.to_datetime(us_ex_import['Period'])
us_ex_import['Year'] = us_ex_import['Period'].dt.year
us_ex_import['Month'] = us_ex_import['Period'].dt.month
us_ex_import.head()

Unnamed: 0,Period,Total Imports (Mmcf),Total Exports (Mmcf),Year,Month
0,2022-02-01,259389,545563,2022,2
1,2022-01-01,296179,610102,2022,1
2,2021-12-01,252626,620886,2021,12
3,2021-11-01,242405,556982,2021,11
4,2021-10-01,228203,545055,2021,10


In [25]:
## create new dataframe to have us_ex_import data
Total_im_export_df = pd.DataFrame(data = us_ex_import,
                                        columns={'Year':'Year',
                                                 'Month':'Month',
                                                'Total Imports (Mmcf)': 'Total Imports (Mmcf)',
                                               'Total Exports (Mmcf)':'Total Exports (Mmcf)'})

Total_im_export_df

Unnamed: 0,Year,Month,Total Imports (Mmcf),Total Exports (Mmcf)
0,2022,2,259389,545563
1,2022,1,296179,610102
2,2021,12,252626,620886
3,2021,11,242405,556982
4,2021,10,228203,545055
...,...,...,...,...
585,1973,5,85844,8741
586,1973,4,88407,8017
587,1973,3,91581,4021
588,1973,2,83870,6079


In [26]:
Total_im_export_df = Total_im_export_df[(Total_im_export_df.Year >= int(2001))]
Total_im_export_df

Unnamed: 0,Year,Month,Total Imports (Mmcf),Total Exports (Mmcf)
0,2022,2,259389,545563
1,2022,1,296179,610102
2,2021,12,252626,620886
3,2021,11,242405,556982
4,2021,10,228203,545055
...,...,...,...,...
249,2001,5,321878,28981
250,2001,4,318573,23637
251,2001,3,358103,32121
252,2001,2,328289,26882


### Filter data by dates

In [27]:
import_export_data = pd.concat([price_im_export_df,Total_im_export_df], axis='columns')
import_export_data

Unnamed: 0,Year,Month,Import_price($/Mcf),Export_price($/Mcf),Year.1,Month.1,Total Imports (Mmcf),Total Exports (Mmcf)
0,2022,2,5.62,8.22,2022,2,259389,545563
1,2022,1,6.87,7.04,2022,1,296179,610102
2,2021,12,4.74,7.40,2021,12,252626,620886
3,2021,11,5.18,8.10,2021,11,242405,556982
4,2021,10,4.79,7.97,2021,10,228203,545055
...,...,...,...,...,...,...,...,...
249,2001,5,4.95,5.50,2001,5,321878,28981
250,2001,4,5.35,5.65,2001,4,318573,23637
251,2001,3,5.42,4.93,2001,3,358103,32121
252,2001,2,6.45,5.80,2001,2,328289,26882


In [28]:
import_export_df = import_export_data.loc[:,~import_export_data.columns.duplicated()]
import_export_df

Unnamed: 0,Year,Month,Import_price($/Mcf),Export_price($/Mcf),Total Imports (Mmcf),Total Exports (Mmcf)
0,2022,2,5.62,8.22,259389,545563
1,2022,1,6.87,7.04,296179,610102
2,2021,12,4.74,7.40,252626,620886
3,2021,11,5.18,8.10,242405,556982
4,2021,10,4.79,7.97,228203,545055
...,...,...,...,...,...,...
249,2001,5,4.95,5.50,321878,28981
250,2001,4,5.35,5.65,318573,23637
251,2001,3,5.42,4.93,358103,32121
252,2001,2,6.45,5.80,328289,26882


## Save to csv file

In [29]:
import_export_df.to_csv('import_export.csv')