In [1]:
import pandas as pd

## Preprocess data

In [2]:
#import data and read file
us_import_price = pd.read_csv('Price_of_U.S._Natural_Gas_Imports.csv', skiprows=4)
us_import_price.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Imports Dollars per Thousand Cubic Feet
0,Feb 2022,5.62
1,Jan 2022,6.87
2,Dec 2021,4.74
3,Nov 2021,5.18
4,Oct 2021,4.79


In [3]:
us_import_price.count()

Month                                                                 398
Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet    398
dtype: int64

In [4]:
us_import_price.isnull().sum()

Month                                                                 0
Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet    0
dtype: int64

In [5]:
us_import_price.dtypes

Month                                                                  object
Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet    float64
dtype: object

In [6]:
#convert data type
us_import_price['Dates'] = pd.to_datetime(us_import_price['Month'])
us_import_price.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Imports Dollars per Thousand Cubic Feet,Dates
0,Feb 2022,5.62,2022-02-01
1,Jan 2022,6.87,2022-01-01
2,Dec 2021,4.74,2021-12-01
3,Nov 2021,5.18,2021-11-01
4,Oct 2021,4.79,2021-10-01


In [7]:
us_import_price.columns

Index(['Month',
       'Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet',
       'Dates'],
      dtype='object')

In [8]:
us_import_price['year'] = us_import_price['Dates'].dt.year
us_import_price['month'] = us_import_price['Dates'].dt.month
us_import_price['import_price($/Mcf)'] = us_import_price['Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet']
us_import_price.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Imports Dollars per Thousand Cubic Feet,Dates,year,month,import_price($/Mcf)
0,Feb 2022,5.62,2022-02-01,2022,2,5.62
1,Jan 2022,6.87,2022-01-01,2022,1,6.87
2,Dec 2021,4.74,2021-12-01,2021,12,4.74
3,Nov 2021,5.18,2021-11-01,2021,11,5.18
4,Oct 2021,4.79,2021-10-01,2021,10,4.79


In [9]:
#extract data to new df
gas_import_price = us_import_price.copy()
gas_import_price = gas_import_price.drop(columns = ['Month','Price of U.S. Natural Gas Imports  Dollars per Thousand Cubic Feet'])
gas_import_price.head()

Unnamed: 0,Dates,year,month,import_price($/Mcf)
0,2022-02-01,2022,2,5.62
1,2022-01-01,2022,1,6.87
2,2021-12-01,2021,12,4.74
3,2021-11-01,2021,11,5.18
4,2021-10-01,2021,10,4.79


## Clean gas export price

In [10]:
export_df = pd.read_csv('Price_of_U.S._Natural_Gas_exports.csv', skiprows=4)
export_df.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Exports Dollars per Thousand Cubic Feet
0,Feb 2022,8.22
1,Jan 2022,7.04
2,Dec 2021,7.4
3,Nov 2021,8.1
4,Oct 2021,7.97


In [11]:
export_df.columns

Index(['Month', 'Price of U.S. Natural Gas Exports  Dollars per Thousand Cubic Feet'], dtype='object')

In [12]:
export_df.isnull().sum()

Month                                                                 0
Price of U.S. Natural Gas Exports  Dollars per Thousand Cubic Feet    0
dtype: int64

In [13]:
export_df['Dates'] = pd.to_datetime(export_df['Month'])
export_df['export_price($/Mcf)'] = export_df['Price of U.S. Natural Gas Exports  Dollars per Thousand Cubic Feet']
export_df.head()

Unnamed: 0,Month,Price of U.S. Natural Gas Exports Dollars per Thousand Cubic Feet,Dates,export_price($/Mcf)
0,Feb 2022,8.22,2022-02-01,8.22
1,Jan 2022,7.04,2022-01-01,7.04
2,Dec 2021,7.4,2021-12-01,7.4
3,Nov 2021,8.1,2021-11-01,8.1
4,Oct 2021,7.97,2021-10-01,7.97


In [14]:
export_price = export_df[['Dates','export_price($/Mcf)']]
export_price.head()

Unnamed: 0,Dates,export_price($/Mcf)
0,2022-02-01,8.22
1,2022-01-01,7.04
2,2021-12-01,7.4
3,2021-11-01,8.1
4,2021-10-01,7.97


## Merge export and import price

In [15]:
ex_import_price = gas_import_price.merge(export_price, how='left', on='Dates')
ex_import_price.head()

Unnamed: 0,Dates,year,month,import_price($/Mcf),export_price($/Mcf)
0,2022-02-01,2022,2,5.62,8.22
1,2022-01-01,2022,1,6.87,7.04
2,2021-12-01,2021,12,4.74,7.4
3,2021-11-01,2021,11,5.18,8.1
4,2021-10-01,2021,10,4.79,7.97


## Clean gas import and export data

In [16]:
#import data and read file
us_import = pd.read_csv('U.S._Natural_Gas_Imports.csv', skiprows=4)
us_import.head()

Unnamed: 0,Month,U.S. Natural Gas Imports Million Cubic Feet
0,Feb 2022,259389
1,Jan 2022,296179
2,Dec 2021,252626
3,Nov 2021,242405
4,Oct 2021,228203


In [17]:
us_export = pd.read_csv('U.S._Natural_Gas_Exports.csv', skiprows=4 )
us_export.head()

Unnamed: 0,Month,U.S. Natural Gas Exports Million Cubic Feet
0,Feb 2022,545563
1,Jan 2022,610102
2,Dec 2021,620886
3,Nov 2021,556982
4,Oct 2021,545055


In [18]:
#merge two dataframe on Month
us_ex_import = us_import.merge(us_export, how='left', on = 'Month')
us_ex_import.head()

Unnamed: 0,Month,U.S. Natural Gas Imports Million Cubic Feet,U.S. Natural Gas Exports Million Cubic Feet
0,Feb 2022,259389,545563
1,Jan 2022,296179,610102
2,Dec 2021,252626,620886
3,Nov 2021,242405,556982
4,Oct 2021,228203,545055


In [19]:
us_ex_import.isnull().sum()

Month                                           0
U.S. Natural Gas Imports  Million Cubic Feet    0
U.S. Natural Gas Exports  Million Cubic Feet    0
dtype: int64

In [20]:
us_ex_import.columns

Index(['Month', 'U.S. Natural Gas Imports  Million Cubic Feet',
       'U.S. Natural Gas Exports  Million Cubic Feet'],
      dtype='object')

In [21]:
## covert to date
us_ex_import['Dates'] = pd.to_datetime(us_ex_import['Month'])
us_ex_import['month'] = us_ex_import['Dates'].dt.year
us_ex_import['month'] = us_ex_import['Dates'].dt.month
us_ex_import['Total Imports (Mmcf)'] = us_ex_import['U.S. Natural Gas Imports  Million Cubic Feet']
us_ex_import['Total Exports (Mmcf)'] = us_ex_import['U.S. Natural Gas Exports  Million Cubic Feet']
us_ex_import.head()

Unnamed: 0,Month,U.S. Natural Gas Imports Million Cubic Feet,U.S. Natural Gas Exports Million Cubic Feet,Dates,month,Total Imports (Mmcf),Total Exports (Mmcf)
0,Feb 2022,259389,545563,2022-02-01,2,259389,545563
1,Jan 2022,296179,610102,2022-01-01,1,296179,610102
2,Dec 2021,252626,620886,2021-12-01,12,252626,620886
3,Nov 2021,242405,556982,2021-11-01,11,242405,556982
4,Oct 2021,228203,545055,2021-10-01,10,228203,545055


In [22]:
us_ex_import = us_ex_import.iloc[: , 3:]
us_ex_import.head()

Unnamed: 0,Dates,month,Total Imports (Mmcf),Total Exports (Mmcf)
0,2022-02-01,2,259389,545563
1,2022-01-01,1,296179,610102
2,2021-12-01,12,252626,620886
3,2021-11-01,11,242405,556982
4,2021-10-01,10,228203,545055


## Save new files for Total ex-import, ex-import price