**1-Multiple Linear Regression**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the dataset
file_path = '/content/Housing.csv'
housing_data = pd.read_csv(file_path)

# Convert categorical variables to dummy variables
housing_data = pd.get_dummies(housing_data, drop_first=True)

# Define independent and dependent variables
X = housing_data.drop('price', axis=1)
y = housing_data['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the multiple linear regression model
mlr = LinearRegression()
mlr.fit(X_train, y_train)

# Predict on the test set
y_pred = mlr.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)  # squared=False to get RMSE
r2 = r2_score(y_test, y_pred)
adjusted_r2 = 1 - (1-r2) * (len(y_test)-1) / (len(y_test) - X_test.shape[1] - 1)

# Print evaluation metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")
print(f"Adjusted R-squared: {adjusted_r2}")


Mean Absolute Error (MAE): 970043.4039201636
Root Mean Squared Error (RMSE): 1324506.9600914386
R-squared (R²): 0.6529242642153184
Adjusted R-squared: 0.6054296898447831


**2-Ridge Regression**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = '/content/Housing.csv'
housing_data = pd.read_csv(file_path)

# Convert categorical variables to dummy variables
housing_data = pd.get_dummies(housing_data, drop_first=True)

# Define independent and dependent variables
X = housing_data.drop('price', axis=1)
y = housing_data['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Fit the ridge regression model
ridge = Ridge(alpha=1.0)  # alpha is the regularization parameter
ridge.fit(X_train, y_train)

# Predict on the test set
y_pred = ridge.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
adjusted_r2 = 1 - (1-r2) * (len(y_test)-1) / (len(y_test) - X_test.shape[1] - 1)

# Print evaluation metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")
print(f"Adjusted R-squared: {adjusted_r2}")


Mean Absolute Error (MAE): 969857.9028478259
Root Mean Squared Error (RMSE): 1324703.4866138187
R-squared (R²): 0.6528212603810125
Adjusted R-squared: 0.6053125907489405


**3-Lasso Regression**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = '/content/Housing.csv'
housing_data = pd.read_csv(file_path)

# Convert categorical variables to dummy variables
housing_data = pd.get_dummies(housing_data, drop_first=True)

# Define independent and dependent variables
X = housing_data.drop('price', axis=1)
y = housing_data['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Fit the lasso regression model
lasso = Lasso(alpha=1.0)  # alpha is the regularization parameter
lasso.fit(X_train, y_train)

# Predict on the test set
y_pred = lasso.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
adjusted_r2 = 1 - (1-r2) * (len(y_test)-1) / (len(y_test) - X_test.shape[1] - 1)

# Print evaluation metrics
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R²): {r2}")
print(f"Adjusted R-squared: {adjusted_r2}")

# Print the coefficients
print(f"Coefficients: {lasso.coef_}")


Mean Absolute Error (MAE): 970043.4056972674
Root Mean Squared Error (RMSE): 1324507.4534974392
R-squared (R²): 0.6529240056296141
Adjusted R-squared: 0.6054293958736665
Coefficients: [ 519552.19145759   57349.25727342  521878.76074471  349250.9593994
  192005.64208111  128498.27282894   88768.30056116  187067.23793852
  149861.68687237  365157.15443591  266655.81624671  -62835.14662636
 -192014.11306384]


**4-Polynomial Regression**

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
data = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Assuming the target variable is 'price' and the rest are features
X = data.drop('price', axis=1)
y = data['price']

# Encoding categorical variables if any
X = pd.get_dummies(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Transforming the features to polynomial features
poly = PolynomialFeatures(degree=2)  # Change the degree as needed
X_train_poly = poly.fit_transform(X_train)
X_test_poly = poly.transform(X_test)

# Applying Linear Regression on polynomial features
model = LinearRegression()
model.fit(X_train_poly, y_train)

# Making predictions
y_pred = model.predict(X_test_poly)

# Evaluating the model
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# For classification metrics, assuming a threshold
threshold = y.median()
y_test_class = (y_test > threshold).astype(int)
y_pred_class = (y_pred > threshold).astype(int)

accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class)
recall = recall_score(y_test_class, y_pred_class)
f1 = f1_score(y_test_class, y_pred_class)

# Print the evaluation metrics
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"R2 Score: {r2}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")



RMSE: 1.0091366371397685e+17
MAE: 1.3669471460625156e+16
R2 Score: -2.014724700188259e+21
Accuracy: 0.7889908256880734
Precision: 0.8181818181818182
Recall: 0.7758620689655172
F1-Score: 0.7964601769911505


**5-Decision Tree Regression**

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
data = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Assuming the target variable is 'price' and the rest are features
X = data.drop('price', axis=1)
y = data['price']

# Encoding categorical variables if any
X = pd.get_dummies(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Applying Decision Tree Regression
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# For classification metrics, assuming a threshold
threshold = y.median()
y_test_class = (y_test > threshold).astype(int)
y_pred_class = (y_pred > threshold).astype(int)

accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class)
recall = recall_score(y_test_class, y_pred_class)
f1 = f1_score(y_test_class, y_pred_class)

# Print the evaluation metrics
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"R2 Score: {r2}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")


RMSE: 1699626.144131854
MAE: 1234857.7981651376
R2 Score: 0.4284916641732388
Accuracy: 0.7981651376146789
Precision: 0.8333333333333334
Recall: 0.7758620689655172
F1-Score: 0.8035714285714286


**6-Random Forest Regression**

In [3]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
data = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Assuming the target variable is 'price' and the rest are features
X = data.drop('price', axis=1)
y = data['price']

# Encoding categorical variables if any
X = pd.get_dummies(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Applying Random Forest Regression
model = RandomForestRegressor(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# For classification metrics, assuming a threshold
threshold = y.median()
y_test_class = (y_test > threshold).astype(int)
y_pred_class = (y_pred > threshold).astype(int)

accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class)
recall = recall_score(y_test_class, y_pred_class)
f1 = f1_score(y_test_class, y_pred_class)

# Print the evaluation metrics
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"R2 Score: {r2}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")


RMSE: 1404604.5124413748
MAE: 1024050.8876146789
R2 Score: 0.6096772346191941
Accuracy: 0.7889908256880734
Precision: 0.8070175438596491
Recall: 0.7931034482758621
F1-Score: 0.8


**Classification Models**



**1-Logistic Regression**

In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
data = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Assuming the target variable is 'price' and the rest are features
X = data.drop('price', axis=1)
y = data['price']

# Convert the target variable to a binary classification problem
threshold = y.median()
y_binary = (y > threshold).astype(int)

# Encoding categorical variables if any
X = pd.get_dummies(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_binary, test_size=0.2, random_state=42)

# Applying Logistic Regression
model = LogisticRegression(random_state=42)
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Probabilities for the positive class

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred_prob, squared=False)
mae = mean_absolute_error(y_test, y_pred_prob)

# Print the evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


Accuracy: 0.8532110091743119
Precision: 0.92
Recall: 0.7931034482758621
F1-Score: 0.851851851851852
RMSE: 0.33572249520072167
MAE: 0.22364744753408405


**2-Multinomial Logistic Regression**

In [5]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
data = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Assuming the target variable is 'price' and the rest are features
X = data.drop('price', axis=1)
y = data['price']

# Convert the target variable to a multinomial classification problem
# For simplicity, we can use tertiles to create three classes: low, medium, high price
y_multinomial = pd.qcut(y, q=3, labels=[0, 1, 2])

# Encoding categorical variables if any
X = pd.get_dummies(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_multinomial, test_size=0.2, random_state=42)

# Applying Multinomial Logistic Regression
model = LogisticRegression(random_state=42, multi_class='multinomial', solver='lbfgs')
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)
y_pred_prob = model.predict_proba(X_test)

# Evaluating the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
rmse = np.sqrt(mean_squared_error(pd.get_dummies(y_test), y_pred_prob))
mae = mean_absolute_error(pd.get_dummies(y_test), y_pred_prob)

# Print the evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")


Accuracy: 0.7522935779816514
Precision: 0.7724869690103877
Recall: 0.7522935779816514
F1-Score: 0.7597119703933546
RMSE: 0.3367747532965083
MAE: 0.23045531609592293


**3-Gradient Boosting Machine**

In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
data = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Assuming the target variable is 'price' and the rest are features
X = data.drop('price', axis=1)
y = data['price']

# Encoding categorical variables if any
X = pd.get_dummies(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Applying Gradient Boosting Regression
model = GradientBoostingRegressor(random_state=42, n_estimators=100, learning_rate=0.1, max_depth=3)
model.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Evaluating the model
rmse = mean_squared_error(y_test, y_pred, squared=False)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# For classification metrics, assuming a threshold
threshold = y.median()
y_test_class = (y_test > threshold).astype(int)
y_pred_class = (y_pred > threshold).astype(int)

accuracy = accuracy_score(y_test_class, y_pred_class)
precision = precision_score(y_test_class, y_pred_class)
recall = recall_score(y_test_class, y_pred_class)
f1 = f1_score(y_test_class, y_pred_class)

# Print the evaluation metrics
print(f"RMSE: {rmse}")
print(f"MAE: {mae}")
print(f"R2 Score: {r2}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")


RMSE: 1302029.7157708362
MAE: 969452.6999702812
R2 Score: 0.6646042495954598
Accuracy: 0.7981651376146789
Precision: 0.8214285714285714
Recall: 0.7931034482758621
F1-Score: 0.8070175438596492


**4-Support Vector Machines**

In [7]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
df = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Encode categorical features
label_encoders = {}
categorical_columns = df.select_dtypes(include=['object']).columns

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split the data into features and target
X = df.drop('price', axis=1)
y = df['price']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the SVR model
svr = SVR()
svr.fit(X_train, y_train)

# Make predictions
y_pred = svr.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Print evaluation results
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")


Mean Absolute Error (MAE): 1763889.461626673
Root Mean Squared Error (RMSE): 2359647.7903070115


**5-K-Nearest Neighbours (KNN)**

In [8]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_absolute_error, mean_squared_error
import numpy as np

# Load the dataset
file_path = '/content/drive/MyDrive/AI_project_Dataset/Housing.csv'
df = pd.read_csv('/content/drive/MyDrive/AI_project_Dataset/Housing.csv')

# Encode categorical features
label_encoders = {}
categorical_columns = df.select_dtypes(include=['object']).columns

for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

# Split the data into features and target
X = df.drop('price', axis=1)  # Replace 'price' with your target column for classification
y = df['price']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the KNN model
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

# Make predictions
y_pred = knn.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Print evaluation results
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")


Accuracy: 0.0
Precision: 0.0
Recall: 0.0
F1-score: 0.0
Mean Absolute Error (MAE): 1553165.1376146788
Root Mean Squared Error (RMSE): 2173571.267206082


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
