In [1]:
import pandas as pd
import numpy as np

In [2]:
data = pd.read_csv('C:/Users/omarn/Downloads/train.csv')
data.head()

Unnamed: 0,Id,Property_Type,Property_Area,Number_of_Windows,Number_of_Doors,Furnishing,Frequency_of_Powercuts,Power_Backup,Water_Supply,Traffic_Density_Score,Crime_Rate,Dust_and_Noise,Air_Quality_Index,Neighborhood_Review,Habitability_score
0,0x21e3,Apartment,106,,1,Semi_Furnished,0.0,No,Once in a day - Morning,5.89,Slightly below average,Medium,90.0,3.86,71.98
1,0x68d4,Apartment,733,2.0,2,Unfurnished,1.0,No,Once in a day - Evening,4.37,Well below average,Medium,96.0,3.55,71.2
2,0x7d81,Apartment,737,4.0,2,Fully Furnished,0.0,No,Once in a day - Morning,7.45,Slightly below average,Medium,121.0,3.81,71.39
3,0x7a57,Apartment,900,3.0,2,Unfurnished,2.0,Yes,Once in a day - Morning,6.16,Well above average,Medium,100.0,1.34,31.46
4,0x9409,Bungalow,2238,14.0,6,Fully Furnished,0.0,No,All time,5.46,Well below average,Medium,116.0,4.77,93.7


### Preprocessing

In [3]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

In [4]:
# Check for NaN values in the entire dataset
nan_values = data.isna().sum()
# Print columns with NaN values and the corresponding count of NaN values
print("Columns with NaN values:")
print(nan_values[nan_values > 0])

Columns with NaN values:
Number_of_Windows         1333
Furnishing                 828
Frequency_of_Powercuts    1109
Crime_Rate                 629
Dust_and_Noise             999
dtype: int64


In [5]:
# Imputing missing values for Number_of_Windows and Frequency_of_Powercuts
median_value = data['Number_of_Windows'].median()

data['Number_of_Windows'].fillna(median_value, inplace=True)

data['Frequency_of_Powercuts'].fillna(median_value, inplace=True)

data = data.dropna() # Drop other rows with Nan values

In [6]:
# Get the unique values of the 'Furnishing' column
unique_furnishing_values = data['Furnishing'].unique()

# Print the unique values
print("Unique values of the 'Furnishing' column:")
for value in unique_furnishing_values:
    print(value)


    from sklearn.preprocessing import OrdinalEncoder

# Drop NaN values from the 'Furnishing' column
data.dropna(subset=['Furnishing'], inplace=True)

# Define the mapping for ordinal encoding
ordinal_mapping = {
    'Semi_Furnished': 1,
    'Unfurnished': 0,
    'Fully Furnished': 2
}

# Apply ordinal encoding
data['Furnishing'] = data['Furnishing'].map(ordinal_mapping)

# Print the unique values to confirm encoding
print("Unique values of the 'Furnishing' column after ordinal encoding:")
print(data['Furnishing'].unique())

Unique values of the 'Furnishing' column:
Semi_Furnished
Unfurnished
Fully Furnished
Unique values of the 'Furnishing' column after ordinal encoding:
[1 0 2]


In [7]:
# Define the mapping for ordinal encoding
ordinal_mapping = {
    'Well below average': 3,
    'Slightly below average': 2,
    'Slightly above average': 1,
    'Well above average': 0
}

# Apply ordinal encoding
data['Crime_Rate'] = data['Crime_Rate'].map(ordinal_mapping)

# Print the unique values to confirm encoding
print("Unique values of the 'Crime_Rate' column after ordinal encoding:")
print(data['Crime_Rate'].unique())

# Define the mapping for ordinal encoding
ordinal_mapping = {
    'Low': 2,
    'Medium': 1,
    'High': 0
}

# Apply ordinal encoding
data['Dust_and_Noise'] = data['Dust_and_Noise'].map(ordinal_mapping)

# Print the unique values to confirm encoding
print("Unique values of the 'Dust_and_Noise' column after ordinal encoding:")
print(data['Dust_and_Noise'].unique())

Unique values of the 'Crime_Rate' column after ordinal encoding:
[2 3 0 1]
Unique values of the 'Dust_and_Noise' column after ordinal encoding:
[1 0 2]


In [8]:
unique_count = data['Power_Backup'].unique()

print("Number of unique values in 'Power_Backup':", unique_count)

# Drop rows with 'NOT MENTIONED' in 'Power_Backup' column
data = data[data['Power_Backup'] != 'NOT MENTIONED']

# Replace 'No' with 0 and 'Yes' with 1
data['Power_Backup'] = data['Power_Backup'].replace({'No': 0, 'Yes': 1})

# Count the number of unique values after transformation
unique_values_after_transformation = data['Power_Backup'].nunique()

print("Number of unique values in 'Power_Backup' after transformation:", unique_values_after_transformation)

Number of unique values in 'Power_Backup': ['No' 'Yes' 'NOT MENTIONED']
Number of unique values in 'Power_Backup' after transformation: 2


In [9]:
data = data[data['Water_Supply'] != 'NOT MENTIONED']

# Encode the remaining values
data['Water_Supply'] = data['Water_Supply'].map({
    'Once in a day - Morning': 1,
    'Once in a day - Evening': 1,
    'All time': 2,
    'Once in two days': 0
})

# Count the number of unique values after transformation
unique_values_after_transformation = data['Water_Supply'].nunique()

print("Number of unique values in 'Water_Supply' after transformation:", unique_values_after_transformation)

Number of unique values in 'Water_Supply' after transformation: 3


In [10]:
# One-Hot Encoding with pandas.get_dummies
categorical_cols = ['Property_Type' ]
data = pd.get_dummies(data, columns=categorical_cols)
data.drop(columns=['Id'], inplace=True)

In [11]:
print(data.head())

   Property_Area  Number_of_Windows  Number_of_Doors  Furnishing  \
0            106                4.0                1           1   
1            733                2.0                2           0   
2            737                4.0                2           2   
3            900                3.0                2           0   
4           2238               14.0                6           2   

   Frequency_of_Powercuts  Power_Backup  Water_Supply  Traffic_Density_Score  \
0                     0.0             0             1                   5.89   
1                     1.0             0             1                   4.37   
2                     0.0             0             1                   7.45   
3                     2.0             1             1                   6.16   
4                     0.0             0             2                   5.46   

   Crime_Rate  Dust_and_Noise  Air_Quality_Index  Neighborhood_Review  \
0           2               1        

In [12]:
X = data.drop(columns="Habitability_score")
y = data["Habitability_score"]

In [13]:
# Scaling features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

### Model Training

In [14]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score,GridSearchCV
from sklearn.svm import SVR
import pickle

In [15]:
# Define the parameter grid for GridSearchCV
param_grid = {
    'kernel': ['linear', 'rbf', 'poly'],  # Different kernel types
    'C': [0.1, 1, 10],  # Regularization parameter
    'degree': [2, 3],  # Degree for polynomial kernel (if applicable)
    'gamma': ['auto', 0.1, 1]  # Gamma for RBF kernel (if applicable)
}

In [16]:
svr = SVR()

n_splits = 7
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)
grid_search = GridSearchCV(svr, param_grid, scoring='neg_mean_squared_error', cv=n_splits)

In [17]:
scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    # Fit scaler to the training features and transform both training and testing features
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Fit SVR model to the scaled training data
    svr.fit(X_train_scaled, y_train)

    # Evaluate SVR model on the scaled testing data
    score = svr.score(X_test_scaled, y_test)
    scores.append(score)

with open('ML_OLY_SVR_Model.pkl', 'wb') as f: # Save model
    pickle.dump(svr, f)

In [18]:
# Print the cross-validation scores
print("Cross-validation scores:", scores)

# Print the mean and standard deviation of the cross-validation scores
print("Mean cross-validation score:", np.mean(scores))
print("Standard deviation of cross-validation scores:", np.std(scores))

Cross-validation scores: [0.715815552414568, 0.7143435658490769, 0.7117546158053334, 0.7274448199028403, 0.7003615511223367, 0.7014731967732438, 0.7322342008174084]
Mean cross-validation score: 0.714775357526401
Standard deviation of cross-validation scores: 0.011086833321738125


In [19]:
import tkinter as tk
from tkinter import filedialog, messagebox, Label, Button, Entry
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.svm import SVR
import pickle

In [20]:
# Load the trained SVR model from file
with open('ML_OLY_SVR_Model.pkl', 'rb') as f:
    svr_loaded = pickle.load(f)

def predict():
    try:
       # Get input from the entry widgets
        Property_Area = property_area_entry.get()
        Number_of_Windows = num_windows_entry.get()
        Number_of_Doors = num_doors_entry.get()
        Furnishing = furnishing_entry.get()
        Frequency_of_Powercuts = frequency_entry.get()
        Power_Backup = power_backup_entry.get()
        Water_Supply = water_supply_entry.get()
        Traffic_Density_Score = traffic_density_entry.get()
        Crime_Rate = crime_rate_entry.get()
        Dust_and_Noise = dust_noise_entry.get()
        Air_Quality_Index = air_quality_entry.get()
        Neighborhood_Review = neighborhood_review_entry.get()
        Habitability_score = habitability_score_entry.get()
        feature1 =  feature1_entry.get()
        feature2 =  feature2_entry.get()
        feature3 =  feature3_entry.get()
        feature4 =  feature4_entry.get()
        feature5 =  feature5_entry.get()

        # Prepare input data for prediction
        user_input = np.array([[Property_Area, Number_of_Windows, Number_of_Doors, Furnishing,
                                Frequency_of_Powercuts, Power_Backup, Water_Supply, Traffic_Density_Score,
                                Crime_Rate, Dust_and_Noise, Air_Quality_Index, Neighborhood_Review,
                                Habitability_score , feature1 , feature2 , feature3, feature4 , feature5]])
        
        # Use the loaded model to make predictions
        predicted_value = svr_loaded.predict(user_input)
        
        # Show the predicted value
        prediction_label.config(text=f"Predicted value: {predicted_value[0]}")
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred: {str(e)}")

In [43]:
window = tk.Tk()
window.title("SVR Prediction")
window.configure(bg="light blue")

# Property Area
property_area_label = tk.Label(window, text="Property Area:", bg="light blue")
property_area_label.grid(row=1, column=0)
property_area_entry = tk.Entry(window, bg="light blue")
property_area_entry.grid(row=1, column=1)

# Number of Windows
num_windows_label = tk.Label(window, text="Number of Windows:", bg="light blue")
num_windows_label.grid(row=2, column=0)
num_windows_entry = tk.Entry(window, bg="light blue")
num_windows_entry.grid(row=2, column=1)

# Number of Doors
num_doors_label = tk.Label(window, text="Number of Doors:", bg="light blue")
num_doors_label.grid(row=3, column=0)
num_doors_entry = tk.Entry(window, bg="light blue")
num_doors_entry.grid(row=3, column=1)

# Furnishing
furnishing_label = tk.Label(window, text="Furnishing:", bg="light blue")
furnishing_label.grid(row=4, column=0)
furnishing_entry = tk.Entry(window, bg="light blue")
furnishing_entry.grid(row=4, column=1)

# Frequency of Powercuts
frequency_label = tk.Label(window, text="Frequency of Powercuts:", bg="light blue")
frequency_label.grid(row=5, column=0)
frequency_entry = tk.Entry(window, bg="light blue")
frequency_entry.grid(row=5, column=1)

# Power Backup
power_backup_label = tk.Label(window, text="Power Backup:", bg="light blue")
power_backup_label.grid(row=6, column=0)
power_backup_entry = tk.Entry(window, bg="light blue")
power_backup_entry.grid(row=6, column=1)

# Water Supply
water_supply_label = tk.Label(window, text="Water Supply:", bg="light blue")
water_supply_label.grid(row=7, column=0)
water_supply_entry = tk.Entry(window, bg="light blue")
water_supply_entry.grid(row=7, column=1)

# Traffic Density Score
traffic_density_label = tk.Label(window, text="Traffic Density Score:", bg="light blue")
traffic_density_label.grid(row=8, column=0)
traffic_density_entry = tk.Entry(window, bg="light blue")
traffic_density_entry.grid(row=8, column=1)

# Crime Rate
crime_rate_label = tk.Label(window, text="Crime Rate:", bg="light blue")
crime_rate_label.grid(row=9, column=0)
crime_rate_entry = tk.Entry(window, bg="light blue")
crime_rate_entry.grid(row=9, column=1)

# Dust and Noise
dust_noise_label = tk.Label(window, text="Dust and Noise:", bg="light blue")
dust_noise_label.grid(row=10, column=0)
dust_noise_entry = tk.Entry(window, bg="light blue")
dust_noise_entry.grid(row=10, column=1)

# Air Quality Index
air_quality_label = tk.Label(window, text="Air Quality Index:", bg="light blue")
air_quality_label.grid(row=11, column=0)
air_quality_entry = tk.Entry(window, bg="light blue")
air_quality_entry.grid(row=11, column=1)

# Neighborhood Review
neighborhood_review_label = tk.Label(window, text="Neighborhood Review:", bg="light blue")
neighborhood_review_label.grid(row=12, column=0)
neighborhood_review_entry = tk.Entry(window, bg="light blue")
neighborhood_review_entry.grid(row=12, column=1)

# Habitability Score
habitability_score_label = tk.Label(window, text="Habitability Score:", bg="light blue")
habitability_score_label.grid(row=13, column=0)
habitability_score_entry = tk.Entry(window, bg="light blue")
habitability_score_entry.grid(row=13, column=1)

# Feature
feature1_label = tk.Label(window, text="Feature1:", bg="light blue")
feature1_label.grid(row=14, column=0)
feature1_entry = tk.Entry(window, bg="light blue")
feature1_entry.grid(row=14, column=1)

# Feature
feature2_label = tk.Label(window, text="Feature2:", bg="light blue")
feature2_label.grid(row=15, column=0)
feature2_entry = tk.Entry(window, bg="light blue")
feature2_entry.grid(row=15, column=1)

# Feature
feature3_label = tk.Label(window, text="Feature3:", bg="light blue")
feature3_label.grid(row=16, column=0)
feature3_entry = tk.Entry(window, bg="light blue")
feature3_entry.grid(row=16, column=1)

# Feature
feature4_label = tk.Label(window, text="Feature4:", bg="light blue")
feature4_label.grid(row=17, column=0)
feature4_entry = tk.Entry(window, bg="light blue")
feature4_entry.grid(row=17, column=1)

# Feature
feature5_label = tk.Label(window, text="Feature5:", bg="light blue")
feature5_label.grid(row=18, column=0)
feature5_entry = tk.Entry(window, bg="light blue")
feature5_entry.grid(row=18, column=1)

predict_button = tk.Button(window, text="Predict", command=predict)
predict_button.grid(row=21, columnspan=3)

# Create a label to show prediction result
prediction_label = tk.Label(window, text="", bg="light blue")
prediction_label.grid(row=25, columnspan=3)

In [44]:
# Run the GUI
window.mainloop()