### Ridge Regression

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error
import plotly.express as px
from sklearn.linear_model import Ridge

In [3]:
df = pd.read_csv("Datasets/preprocessed_dataset.csv")

In [4]:
df = df.drop("Unnamed: 0",axis=1)
df

Unnamed: 0,age,sex,height,weight,smoking_history,previous_er_visit_within_14_days,admission_disposition,Hypertension,Chronic cardiac disease (not hypertension),Diabetes,...,Immunity Advance,Vesicare,Zaxine,Quinine Sulfate,Desvenlafaxine,Glucosamine,Turmeric,Cogentin Tab,Elavil,hospital_length_of_stay
0,0.485152,1,0.003233,0.001623,0.0,0,1,1,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21
1,-0.308119,0,-1.497964,-0.362476,0.0,0,1,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5
2,-0.491182,0,0.003233,0.001623,0.0,0,1,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7
3,1.705571,1,1.989114,-0.766671,0.0,0,1,1,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9
4,1.522508,1,0.003233,0.001623,1.0,0,1,1,1,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.119027,0,-0.360602,-0.171303,0.0,1,1,1,0,1,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,2
502,-0.613224,0,-0.235617,-0.417097,0.0,0,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,13
503,0.485152,0,0.003233,-1.378426,0.0,1,1,0,0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,19
504,0.912299,1,-1.110511,-0.832216,0.0,1,1,1,0,1,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9


### Splitting X and y

In [5]:
from sklearn.model_selection import train_test_split
X = df.drop("hospital_length_of_stay",axis =1)
y = df['hospital_length_of_stay']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

X_train shape: (354, 745)
X_test shape: (152, 745)
y_train shape: (354,)
y_test shape: (152,)


### Implenting RIDGE regression to the original dataset small value 1

In [8]:
ridge = Ridge(alpha=1)  
ridge.fit(X_train, y_train)
y_pred_train = ridge.predict(X_train)
y_pred_test = ridge.predict(X_test)
mse_y_test = mean_squared_error(y_test, y_pred_test)
mse_y_train = mean_squared_error(y_train, y_pred_train)
print("Mean Squared Error train:", mse_y_train)
print("Mean Squared Error test:", mse_y_test)

Mean Squared Error train: 37.39640727980008
Mean Squared Error test: 222.43033854644258


In [9]:
df_train = pd.DataFrame({'y_train': y_train, 'y_pred_train': y_pred_train})

# Create a scatter plot using Plotly Express
fig = px.scatter(df_train, x='y_train', y='y_pred_train', title='y_train vs y_pred_train')
fig.update_layout(xaxis_title='y_train', yaxis_title='y_pred_train')
fig.show()

df_test = pd.DataFrame({'y_test': y_test, 'y_pred_test': y_pred_test})

# Create a scatter plot using Plotly Express
fig = px.scatter(df_test, x='y_test', y='y_pred_test', color_discrete_sequence=['green'], title='y_test vs y_pred_test')
fig.update_layout(xaxis_title='y_test', yaxis_title='y_pred_test')
fig.show()

### Implenting lasso regression to the original dataset large value 1000

In [10]:
ridge = Ridge(alpha=1000)  
ridge.fit(X_train, y_train)
y_pred_train = ridge.predict(X_train)
y_pred_test = ridge.predict(X_test)
mse_y_test = mean_squared_error(y_test, y_pred_test)
mse_y_train = mean_squared_error(y_train, y_pred_train)
print("Mean Squared Error train:", mse_y_train)
print("Mean Squared Error test:", mse_y_test)

Mean Squared Error train: 127.3814799415317
Mean Squared Error test: 171.9431982126735


In [11]:
df_train = pd.DataFrame({'y_train': y_train, 'y_pred_train': y_pred_train})

# Create a scatter plot using Plotly Express
fig = px.scatter(df_train, x='y_train', y='y_pred_train', title='y_train vs y_pred_train')
fig.update_layout(xaxis_title='y_train', yaxis_title='y_pred_train')
fig.show()

df_test = pd.DataFrame({'y_test': y_test, 'y_pred_test': y_pred_test})

# Create a scatter plot using Plotly Express
fig = px.scatter(df_test, x='y_test', y='y_pred_test', color_discrete_sequence=['green'], title='y_test vs y_pred_test')
fig.update_layout(xaxis_title='y_test', yaxis_title='y_pred_test')
fig.show()

In [46]:
import numpy as np
from sklearn.model_selection import train_test_split

# Create synthetic data
X = np.arange(1, 10).reshape(-1, 1)  # Feature matrix (1 to 100)
y = np.arange(1, 10)  # Target variable (101 to 200)


In [59]:
import numpy as np
from sklearn.model_selection import train_test_split

# Create synthetic data
X = np.arange(1, 11).reshape(-1, 1)  # Feature matrix (1 to 10)
y = np.arange(11, 21)  # Target variable (11 to 20)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Display the train and test datasets
print("Training set:")
print("X_train:")
print(X_train)
print("y_train:")
print(y_train)
print("\nTesting set:")
print("X_test:")
print(X_test)
print("y_test:")
print(y_test)

Training set:
X_train:
[[ 5]
 [10]
 [ 2]
 [ 7]
 [ 8]
 [ 4]
 [ 1]
 [ 6]]
y_train:
[15 20 12 17 18 14 11 16]

Testing set:
X_test:
[[3]
 [9]]
y_test:
[13 19]


In [61]:
import numpy as np
from sklearn.model_selection import train_test_split

# Create synthetic data
X = np.arange(1, 11).reshape(-1, 1)  # Feature matrix (1 to 10)
y = np.arange(11, 21)  # Target variable (11 to 20)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Display the train and test datasets
print("Training set:")
print("X_train:")
print(X_train)
print("y_train:")
print(y_train)
print("\nTesting set:")
print("X_test:")
print(X_test)
print("y_test:")
print(y_test)

Training set:
X_train:
[[7]
 [5]
 [1]
 [4]
 [2]
 [8]
 [9]
 [6]]
y_train:
[17 15 11 14 12 18 19 16]

Testing set:
X_test:
[[ 3]
 [10]]
y_test:
[13 20]


In [63]:
import numpy as np
from sklearn.model_selection import train_test_split

# Create synthetic data
X = np.arange(1, 11).reshape(-1, 1)  # Feature matrix (1 to 10)
y = np.arange(11, 21)  # Target variable (11 to 20)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

# Display the train and test datasets
print("Training set:")
print("X_train:")
print(X_train)
print("y_train:")
print(y_train)
print("\nTesting set:")
print("X_test:")
print(X_test)
print("y_test:")
print(y_test)

Training set:
X_train:
[[ 6]
 [ 1]
 [ 8]
 [ 3]
 [ 4]
 [ 7]
 [10]
 [ 9]]
y_train:
[16 11 18 13 14 17 20 19]

Testing set:
X_test:
[[5]
 [2]]
y_test:
[15 12]
