In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Load data
df = pd.read_csv('disease_outbreak_data.csv')

# Convert 'date' column to datetime
df['date'] = pd.to_datetime(df['date'])

# Add 'day_number' as the number of days since the first recorded date
df['day_number'] = (df['date'] - df['date'].min()).dt.days

# Check for missing values (NaN)
print("Missing values in dataset:")
print(df.isnull().sum())

# Remove rows with missing values
df = df.dropna()

# Check for infinite values and replace them with NaN
df.replace([np.inf, -np.inf], np.nan, inplace=True)

# Remove rows with NaN values after replacing infinity
df = df.dropna()

# Now the data is cleaned, let's define features and target
X = df[['day_number']]
y = df['number_of_cases']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Use Polynomial Regression (degree=2) for better prediction
model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'MAE: {mae:.2f}, R2: {r2:.2f}')


Missing values in dataset:
date               5
number_of_cases    5
day_number         5
dtype: int64
MAE: 39.24, R2: 0.97


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
# Load data
data = pd.read_csv('bed_occupancy_data.csv')
X = data[['day']]
y = data['bed_occupancy']
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model training
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
# Prediction
y_pred = rf_model.predict(X_test)
# Evaluate model
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Absolute Error: {mae:.2f}')
print(f'R2 Score: {r2:.2f}')

Mean Absolute Error: 0.95
R2 Score: 0.98


In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
# Load data
data = pd.read_csv('medication_effectiveness_data.csv')
# Encode categorical variables
le = LabelEncoder()
data['medication'] = le.fit_transform(data['medication'])
X = data[['medication', 'duration']]
y = data['effectiveness_score'] > 7 # Binary classification: Effective (True/False)
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train classifier
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train)
# Predictions
y_pred = clf.predict(X_test)
print("Predictions (Effective/Not Effective):", y_pred)

Predictions (Effective/Not Effective): [ True  True False False]
