A predictive model to predict the patients getting admitted in the near future, with respect to each state.

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load dataset
covid = pd.read_csv('/content/covid.csv')

# Define response variable and predictor variables
response_variable = 'hospital_onset_covid'
predictor_variables = [
    'state',
    'previous_day_admission_adult_covid_confirmed',
    'previous_day_admission_pediatric_covid_confirmed',
    'staffed_icu_adult_patients_confirmed_covid',
    'total_adult_patients_hospitalized_confirmed_covid',
    'total_pediatric_patients_hospitalized_confirmed_covid'
    # Add other variables as needed
]

# Separate features (X) and target variable (y)
x = covid[predictor_variables]
y = covid[response_variable]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Handle missing values in the target variable
y_imputer = SimpleImputer(strategy='mean')
y_train = y_imputer.fit_transform(y_train.values.reshape(-1, 1)).ravel()
y_test = y_imputer.transform(y_test.values.reshape(-1, 1)).ravel()

# Create a column transformer for preprocessing
# This will handle one-hot encoding for categorical variables and scaling for numerical variables
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), x.select_dtypes(include='number').columns),
        ('cat', OneHotEncoder(handle_unknown='ignore'), ['state'])
    ])

# Create a linear regression model pipeline
model = Pipeline(steps=[('preprocessor', preprocessor),
                        ('regressor', LinearRegression())])

# Fit the model to the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
print(y_pred)

[-0.13069095  6.46233902  2.9151751   5.87702391  3.23081261  6.84857064
  1.66513335 50.76672259  2.65670041  1.54815561  7.33147185]
