### Importing of modules and packages 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import scipy

### Read in entire spreadsheet in variable 'xlsx'

In [None]:
xlsx = pd.ExcelFile('/Users/ivan/Desktop/PycharmProjects/Data_Science_Mini/Data/raw/Weekly-covid-admissions-and-beds-publication-201203.xlsx',
                    engine='openpyxl')

### Extracting last date and assigning it to variable last_day

In [None]:
df = pd.read_excel(r'/Users/ivan/Desktop/PycharmProjects/Data_Science_Mini/Data/raw/Weekly-covid-admissions-and-beds-publication-201203.xlsx',
                   sheet_name='All beds COVID',
                   engine='openpyxl')
last_index = df.iloc[13].last_valid_index()

# convert datetime to date
last_day = df.iloc[13][last_index].strftime('%m/%d/%Y')

 ## Reading in summary data from two sheets (All beds COVID and MV beds COVID)

In [None]:
list_of_regions = ['East of England','London', 'Midlands','North East and Yorkshire',
                   'North West', 'South East', 'South West']
columns_to_drop = ['Type 1 Acute?', 'NHS England Region', 'Code']


In [None]:

df_all_beds = pd.read_excel(r'/Users/ivan/Desktop/PycharmProjects/Data_Science_Mini/Data/raw/Weekly-covid-admissions-and-beds-publication-201203.xlsx',
                            sheet_name='All beds COVID',
                            header=14,
                            engine='openpyxl')
df_all_beds = df_all_beds.loc[df_all_beds['Name'].isin(list_of_regions)]
df_all_beds = df_all_beds.drop(columns=columns_to_drop)
df_all_beds = df_all_beds.reset_index(drop=True)
df_all_beds

 ### Changing column headings to dates

In [None]:
datelist = pd.date_range(start='08/01/2020', end=last_day).strftime('%d-%m-%Y')
i = 0
for col in df_all_beds.columns[1:len(datelist)+2]:
    df_all_beds.rename(columns={col:datelist[i]}, inplace=True)
    i += 1


In [None]:
df_all_beds

In [None]:
df_MV = pd.read_excel(r'/Users/ivan/Desktop/PycharmProjects/Data_Science_Mini/Data/raw/Weekly-covid-admissions-and-beds-publication-201203.xlsx',
                            sheet_name='MV beds COVID',
                            header=14,
                      engine='openpyxl')
df_MV = df_MV.loc[df_MV['Name'].isin(list_of_regions)]
df_MV = df_MV.drop(columns=columns_to_drop)
df_MV = df_MV.reset_index(drop=True)
df_MV.head()

In [None]:
i = 0
for col in df_MV.columns[1:len(datelist)+2]:
    df_MV.rename(columns={col:datelist[i]}, inplace=True)
    i += 1


In [None]:
df_MV

### Comparison of percentage of beds occupied by mechanical ventilation on two user defined days

In [None]:

summary_df = pd.DataFrame(columns=['Region', 'Mechanical Ventilation (MV)',
                                   'All Beds', 'Percentage on MV'])

def mv_vs_all_beds(date):
    # assigning regions and values to summary_df
    summary_df['Region'] = df_MV['Name']
    summary_df['Mechanical Ventilation (MV)'] = df_MV[date_to_compare]
    summary_df['All Beds'] = df_all_beds[date_to_compare]
    summary_df['Percentage on MV'] =((summary_df['Mechanical Ventilation (MV)'] /summary_df['All Beds']) * 100).round(1)

    # Add title to summary_df
    fig = plt.figure(figsize = (16,4))
    ax = fig.add_subplot(111)
    ax.table(cellText = summary_df.values,
              rowLabels = summary_df.index,
              colLabels = summary_df.columns,
              loc = "center"
             )
    ax.set_title(f"Percentage of MV beds on {date_to_compare}")
    ax.axis("off")

date_to_compare = input("Please enter the first date you would like to compare (dd-mm-yyyy)")
mv_vs_all_beds(date_to_compare)

date_to_compare = input("Please enter the second date you would like to compare (dd-mm-yyyy)")
mv_vs_all_beds(date_to_compare)





### Scatter plot of all beds vs MV beds (offset MV beds from 1 to 7 days later)


In [None]:
#extract total row from both 'All beds COVID' and 'MV beds COVID'
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

# initialise variable to hold the smallest RMSE over the 6 days and assume that occurs
# at day 0
smallest_RMSE = 1000
day = 1


for i in range(1,10):

    all_MV_beds = pd.read_excel(xlsx, 'MV beds COVID')
    all_beds_england = pd.read_excel(xlsx, 'All beds COVID')

    all_MV_beds = all_MV_beds.iloc[14][3+i:-1]
    all_beds_england = all_beds_england.iloc[14][4:-i]


    # need to reset the index here or NaNs will be introduced
    all_beds_vs_MV_beds = pd.concat([
        all_beds_england.reset_index(drop=True),
        all_MV_beds.reset_index(drop=True)],
        axis=1)
    all_beds_vs_MV_beds.columns = ['All Beds','MV Beds']

    # Linear Regression
    X = all_beds_vs_MV_beds.iloc[:,:-1]
    y = all_beds_vs_MV_beds.iloc[:,-1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


    # create an instance of the LinearRegression class
    ln1 = LinearRegression()

    # Train the regression model
    ln1.fit(X_train, y_train)

    # print the coefficients
    print(f"{i} days difference")
    print(f"The intercept: {ln1.intercept_}")
    print(f"The trained coefficient is: {ln1.coef_}")
    print()

    # predicting the test set
    y_pred = ln1.predict(X_test)

    # RSME (with sqaured=False)
    RMSE = metrics.mean_squared_error(y_pred, y_test, squared=False)
    print(f"RMSE = {RMSE}")
    if RMSE < smallest_RMSE:
        smallest_RMSE = RMSE
        day = i

    # Visualising the Training set results
    plt.scatter(X_train, y_train, color='green')
    plt.plot(X_train, ln1.predict(X_train), color='yellow')
    plt.title(f"All beds vs MV beds (Training set) for day {i}")
    plt.xlabel('All beds')
    plt.ylabel('MV beds')
    plt.show()

    # Visualising the Test set results
    plt.scatter(X_test, y_test, color='red')

    # don't replace anything here as regression line is based on testing set
    plt.plot(X_train, ln1.predict(X_train), color='pink')
    plt.title(f"All beds vs MV beds (Test set) {i} days difference")
    plt.xlabel('All beds')
    plt.ylabel('MV beds')
    plt.show()



In [None]:
print(f"The smallest RMSE is {smallest_RMSE.round(1)} from {day} difference")

### Does the linear regression shows that after 1 day patients move onto a MV bed?

### Multiple Linear Regression
##### Independent variables 'All Beds' and 'Hospital Admissions and Diagnosis'
##### Dependent variable 'MV Beds'

In [None]:

# UPDATE!!