In [None]:
import pandas as pd

In [None]:
def imputation(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    my_range = pd.date_range(start=df.timestamp.min(), end=df.timestamp.max(), freq='D')
    missing_dates = my_range.difference(df['timestamp'])
    df = df.append(pd.DataFrame(missing_dates, columns=['timestamp'])).sort_values('timestamp')
    return df

# BP

In [None]:
bp = pd.read_csv('BP_final_data.csv', index_col=0)
bp.timestamp = pd.to_datetime(bp.timestamp)
bp

Unnamed: 0,timestamp,avg_price,dividend_yield,earnings_yield,enterprise_value,crude_oil_price
0,2008-01-02,73.425193,0.034600,0.088200,2.591800e+11,99.640000
1,2008-01-03,74.997322,0.033900,0.086500,2.635900e+11,99.170000
2,2008-01-04,74.371617,0.034500,0.087800,2.601500e+11,97.900000
3,2008-01-05,74.544721,0.034267,0.087300,2.615400e+11,96.960000
4,2008-01-06,74.717824,0.034033,0.086800,2.629300e+11,96.020000
...,...,...,...,...,...,...
4953,2021-07-25,24.016204,0.052367,0.106367,1.293833e+11,71.963333
4954,2021-07-26,24.246797,0.051800,0.105200,1.302700e+11,71.910000
4955,2021-07-27,24.185154,0.052000,0.105500,1.300000e+11,71.650000
4956,2021-07-28,24.334298,0.051600,0.104900,1.305100e+11,72.390000


In [None]:
bp.dtypes

timestamp           datetime64[ns]
avg_price                  float64
dividend_yield             float64
earnings_yield             float64
enterprise_value           float64
crude_oil_price            float64
dtype: object

In [None]:
bp_sentiment = pd.read_csv('BP_sentiment.csv', index_col=0)
bp_sentiment.columns=['timestamp', 'sentiment']
bp_sentiment

Unnamed: 0,timestamp,sentiment
0,2013-09-19,1.000000
1,2013-09-20,1.000000
2,2013-09-21,1.000000
3,2013-09-22,1.000000
4,2013-09-23,1.000000
...,...,...
2861,2021-07-20,1.000000
2862,2021-07-21,1.500000
2863,2021-07-22,1.000000
2864,2021-07-23,1.000000


In [None]:
pd.date_range('2013-09-19', '2021-07-24')

DatetimeIndex(['2013-09-19', '2013-09-20', '2013-09-21', '2013-09-22',
               '2013-09-23', '2013-09-24', '2013-09-25', '2013-09-26',
               '2013-09-27', '2013-09-28',
               ...
               '2021-07-15', '2021-07-16', '2021-07-17', '2021-07-18',
               '2021-07-19', '2021-07-20', '2021-07-21', '2021-07-22',
               '2021-07-23', '2021-07-24'],
              dtype='datetime64[ns]', length=2866, freq='D')

In [None]:
bp_sentiment = imputation(bp_sentiment).reset_index(drop=True)
bp_sentiment

Unnamed: 0,timestamp,sentiment
0,2013-09-19,1.000000
1,2013-09-20,1.000000
2,2013-09-21,1.000000
3,2013-09-22,1.000000
4,2013-09-23,1.000000
...,...,...
2861,2021-07-20,1.000000
2862,2021-07-21,1.500000
2863,2021-07-22,1.000000
2864,2021-07-23,1.000000


In [None]:
final = bp.merge(bp_sentiment, on='timestamp')
final

Unnamed: 0,timestamp,avg_price,dividend_yield,earnings_yield,enterprise_value,crude_oil_price,sentiment
0,2013-09-19,42.318316,0.051000,0.190400,1.518100e+11,106.260000,1.000000
1,2013-09-20,42.598544,0.050900,0.189800,1.527000e+11,104.700000,1.000000
2,2013-09-21,42.496745,0.050967,0.190100,1.524900e+11,104.340000,1.000000
3,2013-09-22,42.394947,0.051033,0.190400,1.522800e+11,103.980000,1.000000
4,2013-09-23,42.293148,0.051100,0.190700,1.520700e+11,103.620000,1.000000
...,...,...,...,...,...,...,...
2861,2021-07-20,23.019519,0.054500,0.110700,1.261600e+11,67.244000,1.000000
2862,2021-07-21,23.930742,0.052700,0.107000,1.288600e+11,70.300000,1.500000
2863,2021-07-22,23.651055,0.053300,0.108300,1.279100e+11,71.910000,1.000000
2864,2021-07-23,23.555018,0.053500,0.108700,1.276100e+11,72.070000,1.000000


In [None]:
final.isna().sum()

timestamp           0
avg_price           0
dividend_yield      0
earnings_yield      0
enterprise_value    0
crude_oil_price     0
sentiment           0
dtype: int64

In [None]:
final.to_csv('BP_final_data_with_sentiment.csv')

# VLKAF

In [None]:
vlkaf = pd.read_csv('VLKAF_final_data.csv', index_col=0)
vlkaf.timestamp = pd.to_datetime(vlkaf.timestamp)
vlkaf

Unnamed: 0,timestamp,avg_price,dividend_yield,earnings_yield,enterprise_value,steel_price
0,2009-12-01,122.000000,0.019483,0.042711,9.761894e+10,1617.235000
1,2009-12-02,122.000000,0.019598,0.042964,9.731362e+10,1648.840000
2,2009-12-03,124.000000,0.019713,0.043217,9.700830e+10,1644.425000
3,2009-12-04,122.406600,0.019828,0.043470,9.670298e+10,1628.870000
4,2009-12-05,121.675280,0.019943,0.043723,9.639766e+10,1626.441667
...,...,...,...,...,...,...
4344,2021-10-23,325.068853,0.034633,0.034633,2.277467e+11,1576.310000
4345,2021-10-24,327.850849,0.034367,0.034367,2.284533e+11,1588.580000
4346,2021-10-25,330.632845,0.034100,0.034100,2.291600e+11,1600.850000
4347,2021-10-26,341.457074,0.033000,0.033000,2.325300e+11,1621.665000


In [None]:
vlkaf.dtypes

timestamp           datetime64[ns]
avg_price                  float64
dividend_yield             float64
earnings_yield             float64
enterprise_value           float64
steel_price                float64
dtype: object

In [None]:
vlkaf_sentiment = pd.read_csv('VLKAF_sentiment.csv', index_col=0)
vlkaf_sentiment.columns=['timestamp', 'sentiment']
vlkaf_sentiment

Unnamed: 0,timestamp,sentiment
0,2015-03-02,1.0
1,2015-03-03,1.0
2,2015-03-04,1.0
3,2015-03-05,1.0
4,2015-03-06,1.0
...,...,...
2425,2021-10-21,1.0
2426,2021-10-22,1.0
2427,2021-10-23,1.0
2428,2021-10-24,2.0


In [None]:
vlkaf_sentiment = imputation(vlkaf_sentiment).reset_index(drop=True)
vlkaf_sentiment

Unnamed: 0,timestamp,sentiment
0,2015-03-02,1.0
1,2015-03-03,1.0
2,2015-03-04,1.0
3,2015-03-05,1.0
4,2015-03-06,1.0
...,...,...
2425,2021-10-21,1.0
2426,2021-10-22,1.0
2427,2021-10-23,1.0
2428,2021-10-24,2.0


In [None]:
final2 = vlkaf.merge(vlkaf_sentiment, on='timestamp')
final2

Unnamed: 0,timestamp,avg_price,dividend_yield,earnings_yield,enterprise_value,steel_price,sentiment
0,2015-03-02,250.675000,0.021900,0.116000,2.115300e+11,939.740,1.0
1,2015-03-03,247.416667,0.022200,0.117600,2.098400e+11,941.615,1.0
2,2015-03-04,244.275000,0.022300,0.118000,2.094400e+11,919.085,1.0
3,2015-03-05,245.522100,0.022300,0.118000,2.094400e+11,918.080,1.0
4,2015-03-06,247.700000,0.022100,0.117400,2.100800e+11,901.390,1.0
...,...,...,...,...,...,...,...
2425,2021-10-21,320.135005,0.035400,0.035400,2.255200e+11,1567.385,1.0
2426,2021-10-22,322.286857,0.034900,0.034900,2.270400e+11,1564.040,1.0
2427,2021-10-23,325.068853,0.034633,0.034633,2.277467e+11,1576.310,1.0
2428,2021-10-24,327.850849,0.034367,0.034367,2.284533e+11,1588.580,2.0


In [None]:
final2.isna().sum()

timestamp           0
avg_price           0
dividend_yield      0
earnings_yield      0
enterprise_value    0
steel_price         0
sentiment           0
dtype: int64

In [None]:
final2.to_csv('VLKAF_final_data_with_sentiment.csv')