In [1]:
from sklearn.datasets import load_boston
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('delhi_dataset.csv')

In [3]:
target = pd.DataFrame(np.log(data['Total_Confirmed_cases_till_date']), columns=['Total_Confirmed_cases_till_date'])
features = data.drop(['Total_Confirmed_cases_till_date', 'Date', 'Name_of_state', 'Population' ,
                      'Latitude', 'Longitude' ,'Recovered_till_date','Mortality_Rate_in_percent',
                      'Min_Temperature_in_Degrees', 'Max_Temperature_in_Degrees'], axis= 1)
features['log_Recovered_till_date'] = np.log(data['Recovered_till_date'])
#features['log_death_till_date'] = np.log(data['Death_till_date'])

In [4]:
death_idx = 0
test_idx = 1
fatality_idx = 2
recover_idx = 3

print(features.mean())
features_stats = features.mean().values.reshape(1, 4)

Death_till_date                     170.481481
Total_Test_till_date             109009.129630
Case_Fatality_Rate_in_percent         0.018167
log_Recovered_till_date               7.403226
dtype: float64


In [5]:
regr = LinearRegression().fit(features, target)
fitted_vals = regr.predict(features)

MSE = mean_squared_error(target, fitted_vals)
RMSE = np.sqrt(MSE)

In [6]:
def get_log_estimate(deaths,test, fatality, recovered,  high_confidence = True):
    features_stats[0][death_idx] = deaths
    features_stats[0][test_idx] = test
    features_stats[0][fatality_idx] = fatality
    features_stats[0][recover_idx] = np.log(recovered)
    
    estimate = regr.predict(features_stats)[0][0]    
    
    if high_confidence:
        upper_bound = estimate + 2*RMSE
        lower_bound = estimate - 2*RMSE
        interval = 95
    else:
        upper_bound = estimate + RMSE
        lower_bound = estimate - RMSE
        interval = 68
    
    return estimate, upper_bound, lower_bound, interval

In [7]:
def get_covid_estimate(deaths_till_date,total_test_till_date, fatality_rate, recovered_till_date,  large_range=True):
    
    """
    This Function Calculates the total number of confirmed corona virus victims in delhi.
        This function take five parameteres.
        
        Parameters:
        deaths_till_date : It is the total number of death till date.
        total_test_till_date : It is the total number of coronavirus test performed till date.
        fatality_rate : It is the ratio of number of death till that date to tha number of confirmed cases till that date.
        recovered_till_date : It is the total number of recovered patient.
        high_confidence : It tell to include 95% observational dataif true ,else it will include 68% of the observational data.
        
    
    """

    log_est, upper, lower, conf = get_log_estimate( deaths = deaths_till_date, test = total_test_till_date,
                                                   fatality= fatality_rate,
                                                   recovered = recovered_till_date, high_confidence = large_range)
    
    covid_cases = np.e**(log_est)
    upper = np.e**(upper)
    lower = np.e**(lower)
    confirmed = print(f' Number of confirmed cases till date is {covid_cases}')
    Range= print(f'The range of confirmed cases is from {round(lower,3)} to {round(upper,3)}')
    return  

In [8]:
get_covid_estimate(deaths_till_date = 708,total_test_till_date = 246873,
                   fatality_rate = 0.027, recovered_till_date = 10315)

 Number of confirmed cases till date is 26652.73276387813
The range of confirmed cases is from 24632.094 to 28839.13
