In [1]:
%matplotlib inline

import os
import datetime

import IPython
import IPython.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False



In [2]:
pd. set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', 0)

In [3]:
ticker_list = ["aapl", "amzn", "msft"]

ticker = ticker_list[1]

ticker_upper = ticker.upper()

print(ticker_upper)

AMZN


In [4]:
alphavantage_csv_path = f"/app/StockPricePredictions/data/alphavantage/time_series_daily_adjusted/{ticker_upper}/{ticker_upper}.csv"

In [5]:
df = pd.read_csv(alphavantage_csv_path, low_memory=False)
df.sort_values(by=["date"], ascending=True, inplace=True)

df = df[df["date"]>='2010-01-01']

df["date_time"] = pd.to_datetime(df['date'])

df["day_of_week"] = df["date_time"].dt.dayofweek

df["adjusted_close_shift"] = df["5. adjusted close"].shift(-1)

df["percentage_change"] = (df["adjusted_close_shift"] - df["5. adjusted close"]) / df["5. adjusted close"] * 100.0

df["increase"] = df["percentage_change"].apply(lambda x: 1 if x > 0 else 0)

df.set_index("date", inplace=True)

In [6]:
df.shape[0]

3064

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3064 entries, 2010-01-04 to 2022-03-04
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   1. open               3064 non-null   float64       
 1   2. high               3064 non-null   float64       
 2   3. low                3064 non-null   float64       
 3   4. close              3064 non-null   float64       
 4   5. adjusted close     3064 non-null   float64       
 5   6. volume             3064 non-null   float64       
 6   7. dividend amount    3064 non-null   float64       
 7   8. split coefficient  3064 non-null   float64       
 8   date_time             3064 non-null   datetime64[ns]
 9   day_of_week           3064 non-null   int64         
 10  adjusted_close_shift  3063 non-null   float64       
 11  percentage_change     3063 non-null   float64       
 12  increase              3064 non-null   int64         
dtypes: datet

In [8]:
df.tail()
# df[df.percentage_change == 0]

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,date_time,day_of_week,adjusted_close_shift,percentage_change,increase
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2022-02-28,3048.5,3089.0,3017.0,3071.26,3071.26,2884171.0,0.0,1.0,2022-02-28,0,3022.84,-1.576552,0
2022-03-01,3054.65,3081.98,2999.54,3022.84,3022.84,2243679.0,0.0,1.0,2022-03-01,1,3041.05,0.602414,1
2022-03-02,3016.98,3059.99,2975.0,3041.05,3041.05,2366736.0,0.0,1.0,2022-03-02,2,2957.97,-2.731951,0
2022-03-03,3070.63,3078.58,2937.12,2957.97,2957.97,3259860.0,0.0,1.0,2022-03-03,3,2912.82,-1.526385,0
2022-03-04,2943.18,2957.0,2876.14,2912.82,2912.82,3049156.0,0.0,1.0,2022-03-04,4,,,0


In [9]:
df.increase.value_counts()

1    1623
0    1441
Name: increase, dtype: int64

In [10]:
# The day of the week with Monday=0, Sunday=6.

# 5 = Saturday
# 6 = Sunday
df.day_of_week.value_counts()

1    629
2    628
3    619
4    614
0    574
Name: day_of_week, dtype: int64

In [11]:
loc = df.index.get_loc('2010-01-05')
loc

print(loc)

df.iloc[loc]["date_time"].strftime("%Y-%m-%d")

1


'2010-01-05'

In [12]:
twint_csv_path = f"/app/StockPricePredictions/data/alphavantage/time_series_daily_adjusted/{ticker_upper}/{ticker}_finbert_twint_20100101_20220304.csv"

In [13]:
twint_csv_path

'/app/StockPricePredictions/data/alphavantage/time_series_daily_adjusted/AMZN/amzn_finbert_twint_20100101_20220304.csv'

In [14]:
df_twint = pd.read_csv(twint_csv_path, low_memory=False, lineterminator='\n')

In [15]:
df_twint.head()

Unnamed: 0,Headline,Stock,Positive,Negative,Neutral
0,Amazon Shoots Back At Apple With Kindle App Store $AMZN by @fromedome,2010-01-21,0.307876,0.422834,0.26929
1,"The Kindle Market Is ""Too Small"" For iPhone Developers $AMZN",2010-01-24,0.050117,0.100475,0.849408
2,Amazon Has Sold 3 Million Kindles $AMZN by @jwyarow,2010-01-29,0.055871,0.012929,0.9312
3,John Scalzi's comments on Amazon #EBook price control gambit via @LanceWeber $AMZN risks classic #blowback,2010-01-30,0.030953,0.033243,0.935805
4,The Apple-Amazon eBook War Begins: Amazon Deletes Macmillan Books $AMZN,2010-01-30,0.013072,0.801178,0.18575


In [16]:
df_twint['date'] = pd.to_datetime(df_twint['Stock'])

In [17]:
df_twint["day_of_week"] = df_twint["date"].dt.dayofweek

In [18]:
df_twint.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22512 entries, 0 to 22511
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   Headline     22512 non-null  object        
 1   Stock        22512 non-null  object        
 2   Positive     22512 non-null  float64       
 3   Negative     22512 non-null  float64       
 4   Neutral      22512 non-null  float64       
 5   date         22512 non-null  datetime64[ns]
 6   day_of_week  22512 non-null  int64         
dtypes: datetime64[ns](1), float64(3), int64(1), object(2)
memory usage: 1.2+ MB


In [19]:
df_twint.head(1)

Unnamed: 0,Headline,Stock,Positive,Negative,Neutral,date,day_of_week
0,Amazon Shoots Back At Apple With Kindle App Store $AMZN by @fromedome,2010-01-21,0.307876,0.422834,0.26929,2010-01-21,3


In [20]:
# The day of the week with Monday=0, Sunday=6.

# 5 = Saturday
# 6 = Sunday

df_twint.day_of_week.value_counts()

3    4320
4    3781
1    3685
0    3643
2    3509
6    1904
5    1670
Name: day_of_week, dtype: int64

In [21]:
df_twint.head(1)

Unnamed: 0,Headline,Stock,Positive,Negative,Neutral,date,day_of_week
0,Amazon Shoots Back At Apple With Kindle App Store $AMZN by @fromedome,2010-01-21,0.307876,0.422834,0.26929,2010-01-21,3


In [22]:
df_twint["in_index"] = df_twint["Stock"].apply(lambda x: True if x in df.index else False)

In [23]:
df_twint[df_twint.in_index==False]["day_of_week"].value_counts()

6    1904
5    1670
0    171 
4    56  
3    45  
1    36  
2    14  
Name: day_of_week, dtype: int64

In [24]:
df_twint[df_twint.in_index==True]["day_of_week"].value_counts()

3    4275
4    3725
1    3649
2    3495
0    3472
Name: day_of_week, dtype: int64

In [25]:
df.index.get_loc('2010-01-04')


0

In [26]:
from datetime import date, datetime, timedelta

In [27]:
test2 =  '2010-01-09'

dtobj1 = datetime.strptime(test2, "%Y-%m-%d")

print(dtobj1)

days = timedelta(2)

print(days)



dtobj2 = (dtobj1 - days).strftime("%Y-%m-%d")
dtobj2

2010-01-09 00:00:00
2 days, 0:00:00


'2010-01-07'

In [28]:
def update_date(x):

    global ERROR_COUNTER

    if x.in_index == True:
        return x.Stock
    else:
        # try:

            dt_time = datetime.strptime(x.Stock, "%Y-%m-%d")

            # loc = df.index.get_loc(date_part)
            if x.day_of_week == 6: # Sunday to Thursday
                days = timedelta(3)
                idx_lookup = (dt_time - days).strftime("%Y-%m-%d")
                print(idx_lookup)
                try:
                    loc = df.index.get_loc(idx_lookup)
                    return df.iloc[loc]["date_time"].strftime("%Y-%m-%d")
                except Exception as e1:
                    ERROR_COUNTER += 1
                    print(str(e1))
                    return x.Stock
            if x.day_of_week == 5: # Saturday to Thursday
                days = timedelta(2)
                idx_lookup = (dt_time - days).strftime("%Y-%m-%d")
                print(idx_lookup)
                try:
                    loc = df.index.get_loc(idx_lookup)
                    return df.iloc[loc]["date_time"].strftime("%Y-%m-%d")
                except Exception as e2:
                    ERROR_COUNTER += 1
                    print(str(e2))
                    return x.Stock
            if x.day_of_week == 4: # Friday to Thursday
                days = timedelta(1)
                idx_lookup = (dt_time - days).strftime("%Y-%m-%d")
                print(idx_lookup)
                try:
                    loc = df.index.get_loc(idx_lookup)
                    return df.iloc[loc]["date_time"].strftime("%Y-%m-%d")
                except Exception as e3:
                    ERROR_COUNTER += 1
                    print(str(3))
                    return x.Stock
            if x.day_of_week == 0: # Monday to Thursday
                days = timedelta(4)
                idx_lookup = (dt_time - days).strftime("%Y-%m-%d")
                print(idx_lookup)
                try:
                    loc = df.index.get_loc(idx_lookup)
                    return df.iloc[loc]["date_time"].strftime("%Y-%m-%d")
                except Exception as e3:
                    ERROR_COUNTER += 1
                    print(str(e3))
                    return x.Stock

            else:
                return x.Stock

        # except Exception as e:
        #     ERROR_COUNTER += 1
        #     print(str(e))
        #     print(x.day_of_week, x.Stock, x.in_index)
        #     return x.Stock
    # loc = df.index.get_loc('2010-01-01')
    # loc



In [29]:
# The day of the week with Monday=0, Sunday=6.

# 5 = Saturday
# 6 = Sunday

ERROR_COUNTER = 0 

df_twint["backfill_date"] = df_twint["Stock"] ## df_twint.apply(update_date, axis=1)


In [30]:
df_twint.head()

Unnamed: 0,Headline,Stock,Positive,Negative,Neutral,date,day_of_week,in_index,backfill_date
0,Amazon Shoots Back At Apple With Kindle App Store $AMZN by @fromedome,2010-01-21,0.307876,0.422834,0.26929,2010-01-21,3,True,2010-01-21
1,"The Kindle Market Is ""Too Small"" For iPhone Developers $AMZN",2010-01-24,0.050117,0.100475,0.849408,2010-01-24,6,False,2010-01-24
2,Amazon Has Sold 3 Million Kindles $AMZN by @jwyarow,2010-01-29,0.055871,0.012929,0.9312,2010-01-29,4,True,2010-01-29
3,John Scalzi's comments on Amazon #EBook price control gambit via @LanceWeber $AMZN risks classic #blowback,2010-01-30,0.030953,0.033243,0.935805,2010-01-30,5,False,2010-01-30
4,The Apple-Amazon eBook War Begins: Amazon Deletes Macmillan Books $AMZN,2010-01-30,0.013072,0.801178,0.18575,2010-01-30,5,False,2010-01-30


In [31]:
df_twint["in_index"] = df_twint["backfill_date"].apply(lambda x: True if x in df.index else False)

In [32]:
df_twint[df_twint.in_index==False]["day_of_week"].value_counts()

6    1904
5    1670
0    171 
4    56  
3    45  
1    36  
2    14  
Name: day_of_week, dtype: int64

In [33]:
df_twint[df_twint.in_index==True].shape

(18616, 9)

In [34]:
# df_benzinga["text"] = df_benzinga["title"] + " " + df_benzinga["body"]

In [35]:
# df_benzinga['text'] = np.where(df_benzinga["text"], df_benzinga["title"], df_benzinga["text"])

In [36]:
# df_twint = df_twint[["backfill_date", "Positive", "Negative", "Neutral", "Retweets"]]
df_twint = df_twint[["backfill_date", "Positive", "Negative", "Neutral"]]

In [37]:
df_twint.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22512 entries, 0 to 22511
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   backfill_date  22512 non-null  object 
 1   Positive       22512 non-null  float64
 2   Negative       22512 non-null  float64
 3   Neutral        22512 non-null  float64
dtypes: float64(3), object(1)
memory usage: 703.6+ KB


In [38]:
df_twint.head()

Unnamed: 0,backfill_date,Positive,Negative,Neutral
0,2010-01-21,0.307876,0.422834,0.26929
1,2010-01-24,0.050117,0.100475,0.849408
2,2010-01-29,0.055871,0.012929,0.9312
3,2010-01-30,0.030953,0.033243,0.935805
4,2010-01-30,0.013072,0.801178,0.18575


In [39]:
df_twint.rename(columns={"backfill_date": "date"}, inplace=True)

In [40]:
def clean_tweet(df):
    # replace URLs with a whitespace
    df['text'] = df['text'].str.replace('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', ' ')
    
    return df

In [41]:
# df_twint = clean_tweet(df_twint)

In [42]:
df_twint.set_index("date", inplace=True)

In [43]:
df_twint.head()

Unnamed: 0_level_0,Positive,Negative,Neutral
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-21,0.307876,0.422834,0.26929
2010-01-24,0.050117,0.100475,0.849408
2010-01-29,0.055871,0.012929,0.9312
2010-01-30,0.030953,0.033243,0.935805
2010-01-30,0.013072,0.801178,0.18575


In [44]:
df_twint_duplicated_index = df_twint[df_twint.index.duplicated(keep=False)]

In [45]:
df_twint_duplicated_index.shape[0]

21899

In [46]:
df_twint_nonduplicated_index = df_twint[~df_twint.index.duplicated(keep=False)]

In [47]:
df_twint_nonduplicated_index.shape[0]

613

In [48]:
# df_twint.groupby('date')['negative'].mean()

df_twint_groupby = df_twint.groupby("date").agg(
     negative = ("Negative", "mean"),
     nuetral = ("Neutral", "mean"),
     positive = ("Positive", "mean"),
     # retweets = ("Retweets", "sum"),
          # compound = ("compound", "mean"),
          # nlikes = ("nlikes", "sum"),
          # nretweets = ("nretweets", "sum"),
     )


In [49]:
df_twint_groupby

Unnamed: 0_level_0,negative,nuetral,positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-21,0.422834,0.269290,0.307876
2010-01-24,0.100475,0.849408,0.050117
2010-01-29,0.012929,0.931200,0.055871
2010-01-30,0.417210,0.560777,0.022012
2010-01-31,0.023506,0.931297,0.045197
...,...,...,...
2022-02-28,0.187972,0.719530,0.092498
2022-03-02,0.217341,0.625996,0.156664
2022-03-03,0.133639,0.581921,0.284441
2022-03-04,0.127485,0.811645,0.060871


In [50]:
df_merge = pd.merge(df, df_twint_groupby, how="inner", left_index=True, right_index=True)

In [51]:
df_merge.shape

(2416, 16)

In [52]:
df_merge.day_of_week.value_counts()

1    505
2    495
3    481
4    476
0    459
Name: day_of_week, dtype: int64

In [53]:
# df.index
# df_text.index
df_merge[df_merge["5. adjusted close"].isnull()]
# df_merge.shape

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,date_time,day_of_week,adjusted_close_shift,percentage_change,increase,negative,nuetral,positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1


In [54]:
df.shape

(3064, 13)

In [55]:
df_merge.shape

(2416, 16)

In [56]:
df.shape

(3064, 13)

In [57]:
df_merge[df_merge["negative"].isnull()]

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,date_time,day_of_week,adjusted_close_shift,percentage_change,increase,negative,nuetral,positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1


In [58]:
# df_merge.fillna(method="ffill", inplace=True)

In [59]:
df_merge[df_merge.negative.isnull()]

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,date_time,day_of_week,adjusted_close_shift,percentage_change,increase,negative,nuetral,positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1


In [60]:
 #df_merge.fillna(0.0, inplace=True)

In [61]:
# df_merge.to_csv(f"/app/StockPricePredictions/data/alphavantage/time_series_daily_adjusted/{ticker_upper}/{ticker_upper}_WITH_TWINT_FINBERT_SA.csv")
df_merge.head()

Unnamed: 0_level_0,1. open,2. high,3. low,4. close,5. adjusted close,6. volume,7. dividend amount,8. split coefficient,date_time,day_of_week,adjusted_close_shift,percentage_change,increase,negative,nuetral,positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2010-01-21,127.26,128.15,125.0,126.62,126.62,9970600.0,0.0,1.0,2010-01-21,3,121.43,-4.098879,0,0.422834,0.26929,0.307876
2010-01-29,129.77,131.8499,124.14,125.41,125.41,29471300.0,0.0,1.0,2010-01-29,4,118.87,-5.214895,0,0.012929,0.9312,0.055871
2010-02-03,117.12,119.61,116.56,119.1,119.1,12405900.0,0.0,1.0,2010-02-03,2,115.94,-2.653233,0,0.015519,0.927073,0.057408
2010-03-18,131.02,132.85,130.44,132.76,132.76,5021300.0,0.0,1.0,2010-03-18,3,130.35,-1.815306,0,0.114318,0.845012,0.04067
2010-03-19,133.71,133.71,129.66,130.35,130.35,8905000.0,0.0,1.0,2010-03-19,4,130.47,0.09206,1,0.313174,0.671043,0.015783


In [62]:
# df_merge = df_merge.sample(frac=1).reset_index(drop=True)

In [63]:
# X = df_merge[["negative", "nuetral", "positive", "retweets"]]
X = df_merge[["negative", "nuetral", "positive"]]
y = df_merge["increase"]

In [64]:
y.head()

date
2010-01-21    0
2010-01-29    0
2010-02-03    0
2010-03-18    0
2010-03-19    1
Name: increase, dtype: int64

In [65]:
SPLIT = int(0.9 * len(df_merge))

X_train = X[:SPLIT]
X_test = X[SPLIT:]

y_train = y[:SPLIT]
y_test = y[SPLIT:]



In [66]:
X_train

Unnamed: 0_level_0,negative,nuetral,positive
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-21,0.422834,0.269290,0.307876
2010-01-29,0.012929,0.931200,0.055871
2010-02-03,0.015519,0.927073,0.057408
2010-03-18,0.114318,0.845012,0.040670
2010-03-19,0.313174,0.671043,0.015783
...,...,...,...
2021-02-22,0.159393,0.652302,0.188305
2021-02-23,0.170769,0.725952,0.103279
2021-02-24,0.128531,0.773069,0.098400
2021-02-25,0.154669,0.611396,0.233935


In [67]:
from sklearn.ensemble import RandomForestClassifier

In [68]:
rfc = RandomForestClassifier(n_estimators=200, criterion="entropy")
rfc.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=200)

In [69]:
preds = rfc.predict(X_test)

In [70]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [71]:
matrix = confusion_matrix(y_test, preds)

In [72]:
matrix

array([[54, 67],
       [47, 74]])

In [73]:
score = accuracy_score(y_test, preds)

In [74]:
score

0.5289256198347108

In [75]:
print(classification_report(y_test, preds))


              precision    recall  f1-score   support

           0       0.53      0.45      0.49       121
           1       0.52      0.61      0.56       121

    accuracy                           0.53       242
   macro avg       0.53      0.53      0.53       242
weighted avg       0.53      0.53      0.53       242



In [76]:
# corr_matrix = df_merge[["negative", "nuetral", "positive", "retweets", "increase"]].corr()
corr_matrix = df_merge[["negative", "nuetral", "positive", "increase"]].corr()
print (corr_matrix)

          negative   nuetral  positive  increase
negative  1.000000 -0.632322 -0.244630 -0.017452
nuetral  -0.632322  1.000000 -0.596483  0.020993
positive -0.244630 -0.596483  1.000000 -0.008193
increase -0.017452  0.020993 -0.008193  1.000000


In [77]:
from statsmodels.tsa.stattools import grangercausalitytests

In [78]:
#perform Granger-Causality test
grangercausalitytests(df_merge[["positive", "increase"]], maxlag=[63])


Granger Causality
number of lags (no zero) 63
ssr based F test:         F=1.3124  , p=0.0519  , df_denom=2226, df_num=63
ssr based chi2 test:   chi2=87.3961 , p=0.0227  , df=63
likelihood ratio test: chi2=85.8122 , p=0.0296  , df=63
parameter F test:         F=1.3124  , p=0.0519  , df_denom=2226, df_num=63


{63: ({'ssr_ftest': (1.3123658671266663, 0.05188788193354628, 2226.0, 63),
   'ssr_chi2test': (87.39613826459563, 0.02271176439405267, 63),
   'lrtest': (85.81218791001083, 0.02962355996978053, 63),
   'params_ftest': (1.3123658671266691, 0.05188788193354628, 2226.0, 63.0)},
  [<statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7f48d49d9940>,
   <statsmodels.regression.linear_model.RegressionResultsWrapper at 0x7f48d49d97f0>,
   array([[0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 0., 0., 0.],
          ...,
          [0., 0., 0., ..., 0., 0., 0.],
          [0., 0., 0., ..., 1., 0., 0.],
          [0., 0., 0., ..., 0., 1., 0.]])])}