In [99]:
import warnings
warnings.filterwarnings('ignore')

In [100]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter


In [112]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [102]:
# Load the data
file_path = Path('masterfile.csv')
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,date,deaths,cases,total_doses,cumulative_total_doses
0,7/27/2020,149,9087,5,6
1,7/28/2020,129,8799,6,12
2,7/29/2020,157,7812,9,21
3,7/30/2020,147,7239,13,34
4,7/31/2020,148,7215,6,40


# Split the data into Training and Testing

In [103]:
# Create our features
x = df['total_doses']
# Create our target
y = df[['deaths','cases']]

In [104]:
X.describe()

count       813.000000
mean      12207.087331
std       19657.526052
min         585.000000
25%        3093.000000
50%        5861.000000
75%       12652.000000
max      156620.000000
Name: cases, dtype: float64

In [105]:
# Check the balance of our target values
y.value_counts()

deaths  cases
0       748      1
86      76034    1
85      3760     1
        4329     1
        4630     1
                ..
35      11799    1
        18119    1
36      1995     1
        9013     1
716     38589    1
Length: 813, dtype: int64

In [106]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.33, random_state=42)
X_train.shape

(544,)

# Logistic Regression

In [113]:
## Create a Logistic Regression Model
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

In [114]:
## Fit (train) or model using the training data
classifier.fit(X_train, y_train)

ValueError: Expected 2D array, got 1D array instead:
array=[  6094.   1196.   5212.   2523.   9842.   2548.   2364.   3683.   1190.
   2886.  86153.  20110.   6428.  22956.   1948.   6208.  44309.   1232.
   1308.   7070.  11410.  27319.  15490.  11081.  50435.   2339.   3347.
  10987.   3555.  27332.   5499.   5534.  17237.   5124.   4836.   6026.
   3251.    815.   3997.  10987.   6376.   5848.   6583.   6447.   3622.
   4255.   2337.  19123.  11623.   2062.   7313.  22028. 103995.  28343.
  25155.   9816.   3956.   1816.  10937.   5418.   6208.   7239.   5370.
   2274.   6999.  12390.  20107.   5274.   7385.   8718.  11252.   1063.
   7423.   6704.   2235.   5266.   2942.  13683.   2631.   1964.  23936.
   2020.  18859.   5253.   1603.   4915.   5367.  11718.  32658.   1440.
  15739.  17370.  15261.  26535.   3011.   3008. 100973.   1645.   2668.
  89357.   7962.   1372.   1714.   4557.  10314.  12335.  16124.  14170.
   1350.   2704.   9087.   1655.   7144.  21099.   8375.   1276.   7527.
  17438.   2498.  11451.   4240.   3509.   1874.    876.  21603.  41828.
   3545.  31939.  11606.  17895.   8494.   1993.   8639.    585.   1044.
   6786.   3372.   3131.  14105. 134823.   2385.   5148.    748.   3252.
  10624.   3349.   1995.   5299.   8460.   2976.  16613.   2980. 114207.
  19703.    920.   6784.   3302.   2475.  45384.  18189.   5211.   9976.
  14580.  60123.   3760.   5110.   2634.   4802.    705.   5861.   9013.
  45974.   8531.  48510.  13270.   2053.    678.   3665.  28981.  13955.
    850.   8877.   8416.   4294.  12390.  11297.    862.   5973.   4404.
   2244.  38589.   3676.  23574.   2710.    944.  10647.   7864.  32607.
   1218.  11990.  58083.  32700.   6373.   4735.   1680.   5792.   1633.
  17126.   5410.  19949.   8784.   4378.   3702.  11680.   2197.   1967.
   5156.  34505.   1152.   2277.  39861.   5891.   2294.   3467.   3773.
  12636.  16696.  54724.  19395.   6138.   4869.  13449.   1207.   4097.
   3848.   2459.  12615.   6016.  18745.  51912.   5387.   3849.   3227.
  12233.  35649.   3788.  31794.   5960.  23321.   2041.   3654.  22410.
  10689.   4630.   1525.  89862.   2830.   3115.   8928.   5303.  11728.
   1896.   4043.   2747.  13931.   6677.   4767.   9325.   1997.   6674.
  19406.  18119.   4426.   3800.   9691.   4225.  54794.   2386.  34795.
   2217.   2670.   2962.   9136.   2759.   9279.   6418.   7539.   4138.
  21095.   3452.  13457.   9957.   6190.   4180.  16891.   3736.   7226.
  15797.   4595.   5984.   2550.    981.   2628.   7008.   2108.   1276.
   2113.    997.   9467.  26969.  19466.  21100.  38801.   4265.  48549.
  21104.   2866.   5387.   1559.   3131.   6526.   1026.  12768.  18133.
  18261.  44706.   3034.   7732.   1102.    813.  12260.   9242.  17776.
   3527.   5160.   2430.   8853.  23384.  32009.  10988.   6520.   5866.
   7946.   3201.  14490.   3354.  10293.   9892.   3999.  15497.   3443.
   9798.   2127.   8897.   5831.   3924.  17657.   2510.  27072.   1442.
  14083.   2637.  15855.    928.   9302.  17604.   8359.   1566.   3631.
   5648.   1697.   9698.   8184.   3334.   5919.  18135.   4329.   3601.
   8449.   7426.   2770.   8331.   8196.   3060.  17810.   9955.  16342.
  23786.   5114.   4657.   2268.  37400.  56747.   4343. 119649.  34630.
  11546.  16305.  15294.   6823.   5532.  14065.   2676.   5287.   3952.
  38509.  24392.   4403.  15323.   7802.  14854.   3756.   7215.   4810.
  21240.   2443.  70488.  15380.   8472.   3599.   6659.   4244.   2345.
   3937.   6191.  41496.   2569.  21143.   1605.   4453.  16281.   1467.
   4909.  20713.   7544.   2273.   6633.  30859.   2071.   5657.   9357.
  33598.   4604.  88697.   3796.   7088.  53973.   2266.   7591.  16187.
  10070.   2186.   3305.   1886.  12854.   1231.  21037.   1552.  11292.
   3510.   7231.   1088.   1840.   4446.  60488.  27275.  18544.  12917.
  19773.   6432.  11109.   7015.  13401.   8799.   7274.  17896.   3912.
   5077.   2254.  11194.   7633.   3587.  11799.   5480.   2159.  12652.
   3751.   2292.   3178.   1039.   1246.  15037.   2104.  28558.   2944.
   9800.   4860.   3139.   8990.   1669.   5520.  16175.  18518.  15273.
  10788.   3260.  17721.  10865.   3770.   3463.  22319.   6555.   2874.
   5772.    641.   5639.  27232.   1662.  11532.  16100.  10419.   7167.
   2789.   3151.    597.  14632.  32031.   8991.   8266.  14318.   4174.
   6069.   1310.   3359.   5555.  20760.   1726.   2684.  22562.   3737.
  10921.   1820.   6252.   8528.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [115]:
 ## Make predictions
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(20)

NotFittedError: This LogisticRegression instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [None]:
# Predict Accuracy Score
print(accuracy_score(y_test, y_pred))

# Balanced Random Forest Classifier

In [108]:
# Resample the training data with the BalancedRandomForestClassifier
from imblearn.ensemble import BalancedRandomForestClassifier
brfc_model = BalancedRandomForestClassifier(n_estimators =100, random_state=1)
brfc_model.fit(X_train, y_train)

ValueError: Expected 2D array, got 1D array instead:
array=[  6094.   1196.   5212.   2523.   9842.   2548.   2364.   3683.   1190.
   2886.  86153.  20110.   6428.  22956.   1948.   6208.  44309.   1232.
   1308.   7070.  11410.  27319.  15490.  11081.  50435.   2339.   3347.
  10987.   3555.  27332.   5499.   5534.  17237.   5124.   4836.   6026.
   3251.    815.   3997.  10987.   6376.   5848.   6583.   6447.   3622.
   4255.   2337.  19123.  11623.   2062.   7313.  22028. 103995.  28343.
  25155.   9816.   3956.   1816.  10937.   5418.   6208.   7239.   5370.
   2274.   6999.  12390.  20107.   5274.   7385.   8718.  11252.   1063.
   7423.   6704.   2235.   5266.   2942.  13683.   2631.   1964.  23936.
   2020.  18859.   5253.   1603.   4915.   5367.  11718.  32658.   1440.
  15739.  17370.  15261.  26535.   3011.   3008. 100973.   1645.   2668.
  89357.   7962.   1372.   1714.   4557.  10314.  12335.  16124.  14170.
   1350.   2704.   9087.   1655.   7144.  21099.   8375.   1276.   7527.
  17438.   2498.  11451.   4240.   3509.   1874.    876.  21603.  41828.
   3545.  31939.  11606.  17895.   8494.   1993.   8639.    585.   1044.
   6786.   3372.   3131.  14105. 134823.   2385.   5148.    748.   3252.
  10624.   3349.   1995.   5299.   8460.   2976.  16613.   2980. 114207.
  19703.    920.   6784.   3302.   2475.  45384.  18189.   5211.   9976.
  14580.  60123.   3760.   5110.   2634.   4802.    705.   5861.   9013.
  45974.   8531.  48510.  13270.   2053.    678.   3665.  28981.  13955.
    850.   8877.   8416.   4294.  12390.  11297.    862.   5973.   4404.
   2244.  38589.   3676.  23574.   2710.    944.  10647.   7864.  32607.
   1218.  11990.  58083.  32700.   6373.   4735.   1680.   5792.   1633.
  17126.   5410.  19949.   8784.   4378.   3702.  11680.   2197.   1967.
   5156.  34505.   1152.   2277.  39861.   5891.   2294.   3467.   3773.
  12636.  16696.  54724.  19395.   6138.   4869.  13449.   1207.   4097.
   3848.   2459.  12615.   6016.  18745.  51912.   5387.   3849.   3227.
  12233.  35649.   3788.  31794.   5960.  23321.   2041.   3654.  22410.
  10689.   4630.   1525.  89862.   2830.   3115.   8928.   5303.  11728.
   1896.   4043.   2747.  13931.   6677.   4767.   9325.   1997.   6674.
  19406.  18119.   4426.   3800.   9691.   4225.  54794.   2386.  34795.
   2217.   2670.   2962.   9136.   2759.   9279.   6418.   7539.   4138.
  21095.   3452.  13457.   9957.   6190.   4180.  16891.   3736.   7226.
  15797.   4595.   5984.   2550.    981.   2628.   7008.   2108.   1276.
   2113.    997.   9467.  26969.  19466.  21100.  38801.   4265.  48549.
  21104.   2866.   5387.   1559.   3131.   6526.   1026.  12768.  18133.
  18261.  44706.   3034.   7732.   1102.    813.  12260.   9242.  17776.
   3527.   5160.   2430.   8853.  23384.  32009.  10988.   6520.   5866.
   7946.   3201.  14490.   3354.  10293.   9892.   3999.  15497.   3443.
   9798.   2127.   8897.   5831.   3924.  17657.   2510.  27072.   1442.
  14083.   2637.  15855.    928.   9302.  17604.   8359.   1566.   3631.
   5648.   1697.   9698.   8184.   3334.   5919.  18135.   4329.   3601.
   8449.   7426.   2770.   8331.   8196.   3060.  17810.   9955.  16342.
  23786.   5114.   4657.   2268.  37400.  56747.   4343. 119649.  34630.
  11546.  16305.  15294.   6823.   5532.  14065.   2676.   5287.   3952.
  38509.  24392.   4403.  15323.   7802.  14854.   3756.   7215.   4810.
  21240.   2443.  70488.  15380.   8472.   3599.   6659.   4244.   2345.
   3937.   6191.  41496.   2569.  21143.   1605.   4453.  16281.   1467.
   4909.  20713.   7544.   2273.   6633.  30859.   2071.   5657.   9357.
  33598.   4604.  88697.   3796.   7088.  53973.   2266.   7591.  16187.
  10070.   2186.   3305.   1886.  12854.   1231.  21037.   1552.  11292.
   3510.   7231.   1088.   1840.   4446.  60488.  27275.  18544.  12917.
  19773.   6432.  11109.   7015.  13401.   8799.   7274.  17896.   3912.
   5077.   2254.  11194.   7633.   3587.  11799.   5480.   2159.  12652.
   3751.   2292.   3178.   1039.   1246.  15037.   2104.  28558.   2944.
   9800.   4860.   3139.   8990.   1669.   5520.  16175.  18518.  15273.
  10788.   3260.  17721.  10865.   3770.   3463.  22319.   6555.   2874.
   5772.    641.   5639.  27232.   1662.  11532.  16100.  10419.   7167.
   2789.   3151.    597.  14632.  32031.   8991.   8266.  14318.   4174.
   6069.   1310.   3359.   5555.  20760.   1726.   2684.  22562.   3737.
  10921.   1820.   6252.   8528.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [109]:
# Calculated the balanced accuracy score
from sklearn.metrics import balanced_accuracy_score
y_pred = brfc_model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

NotFittedError: This BalancedRandomForestClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

In [110]:
# Display the confusion matrix
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

NameError: name 'y_pred' is not defined

In [None]:
# Print the imbalanced classification report
from imblearn.metrics import classification_report_imbalanced
print(classification_report_imbalanced(y_test, y_pred))