In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score

# Step 1: Load the datasets
train_data = pd.read_csv('../datasets/ServiceTrain.csv')
test_data = pd.read_csv('../datasets/ServiceTest.csv')

# Step 2: Encode the categorical variable 'Service' as 1 for 'Yes' and 0 for 'No'
train_data['Service'] = train_data['Service'].apply(lambda x: 1 if x == 'Yes' else 0)
test_data['Service'] = test_data['Service'].apply(lambda x: 1 if x == 'Yes' else 0)

# Step 3: Split the independent features and the dependent feature
X_train = train_data.drop('Service', axis=1)
y_train = train_data['Service']

X_test = test_data.drop('Service', axis=1)
y_test = test_data['Service']

# Step 4: Apply Logistic Regression
logreg = LogisticRegression(random_state=0)
logreg.fit(X_train, y_train)

# Step 5: Make predictions on the test set
y_pred = logreg.predict(X_test)

# Step 6: Evaluate the model using the confusion matrix and accuracy score
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Print the confusion matrix and accuracy
print("Confusion Matrix:")
print(conf_matrix) # 
print("\nAccuracy: {:.2f}%".format(accuracy * 100))

Confusion Matrix:
[[94  5]
 [ 7 29]]

Accuracy: 91.11%


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

# Step 1: Load the dataset
data = pd.read_csv('../datasets/GHI_Report.csv')

# Step 2: Split the dataset into independent features and dependent feature
# Assuming 'Happiness Score' is the target variable
X = data[['Economy', 'Fam', 'Health', 'Freedom']]  # Independent features
y = data['H_Score']  # Dependent feature

# Step 3: Split the data into training and testing datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

# Step 4: Build the multiple linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 5: Predict on the test data and calculate RMSE
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Output the RMSE value
print("RMSE of the baseline model: {:.2f}".format(rmse))

RMSE of the baseline model: 0.46
