In [5]:
import pandas as pd
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Load dataset (replace 'path_to_your_downloaded_file.csv' with the correct file path)
file_path = '/content/Bengaluru_House_Data.csv'
df = pd.read_csv(file_path)

# Check the first few rows of the dataset to understand its structure
print(df.head())

# Check for non-numeric columns (categorical columns)
print(df.dtypes)

# Identify categorical columns and convert them to numeric using LabelEncoder or OneHotEncoder
categorical_columns = df.select_dtypes(include=['object']).columns

# Convert categorical columns to numeric using LabelEncoder
label_encoder = LabelEncoder()
for column in categorical_columns:
    df[column] = label_encoder.fit_transform(df[column].astype(str))

# Handle missing values - you can either drop or fill missing values
# Here, we fill missing values with the mean of the column
df.fillna(df.mean(), inplace=True)

# Assuming the target variable is 'Price' (change it to match the actual column name in your dataset)
X = df.drop('price', axis=1)  # Feature columns (replace 'Price' with actual column name)
y = df['price']  # Target variable (house price)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train SVM regression models with different kernels
kernels = ['linear', 'poly', 'rbf']
svm_models = {}

for kernel in kernels:
    model = SVR(kernel=kernel)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    svm_models[kernel] = y_pred

    # Calculate MSE, RMSE, and R-squared for each model
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)

    print(f"Kernel: {kernel}")
    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"R-squared: {r2}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print("-" * 50)


              area_type   availability                  location       size  \
0  Super built-up  Area         19-Dec  Electronic City Phase II      2 BHK   
1            Plot  Area  Ready To Move          Chikka Tirupathi  4 Bedroom   
2        Built-up  Area  Ready To Move               Uttarahalli      3 BHK   
3  Super built-up  Area  Ready To Move        Lingadheeranahalli      3 BHK   
4  Super built-up  Area  Ready To Move                  Kothanur      2 BHK   

   society total_sqft  bath  balcony   price  
0  Coomee        1056   2.0      1.0   39.07  
1  Theanmp       2600   5.0      3.0  120.00  
2      NaN       1440   2.0      3.0   62.00  
3  Soiewre       1521   3.0      1.0   95.00  
4      NaN       1200   2.0      1.0   51.00  
area_type        object
availability     object
location         object
size             object
society          object
total_sqft       object
bath            float64
balcony         float64
price           float64
dtype: object
Kernel: linea

# New Section