In [35]:
!pip install pyspark ipywidgets pandas scikit-learn gtts


Collecting gtts
  Downloading gTTS-2.5.1-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.1


In [36]:
import pyspark
from pyspark.sql import SparkSession
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
import numpy as np

# Create Spark session
spark = SparkSession.builder.appName("HOUSE_PRICE_PREDICTION").getOrCreate()

# Load the dataset using Spark
df_spark = spark.read.csv("/content/bengaluru_house_prices.csv", header=True, inferSchema=True)

# Convert Spark DataFrame to Pandas DataFrame
df = df_spark.toPandas()

# Data Cleaning and Preprocessing
def convert_sqft_to_num(x):
    if isinstance(x, str):
        tokens = x.split('-')
        if len(tokens) == 2:
            return (float(tokens[0]) + float(tokens[1])) / 2
        try:
            return float(x)
        except:
            return None
    return x

df['total_sqft'] = df['total_sqft'].apply(convert_sqft_to_num)
df = df.dropna(subset=['total_sqft', 'bath', 'balcony', 'price'])
df['bhk'] = df['size'].apply(lambda x: int(x.split(' ')[0]))

categorical_columns = ['area_type', 'location']
encoder = OneHotEncoder(sparse_output=False)
encoded_categorical_data = encoder.fit_transform(df[categorical_columns])
encoded_df = pd.DataFrame(encoded_categorical_data, columns=encoder.get_feature_names_out(categorical_columns))
df_final = pd.concat([df.reset_index(drop=True), encoded_df.reset_index(drop=True)], axis=1)
df_final = df_final.drop(columns=categorical_columns + ['size', 'society', 'availability'])
df_final = df_final.dropna()

# Features and target variable
X = df_final.drop(columns='price')
y = df_final['price']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model Training
model = LinearRegression()
model.fit(X_train, y_train)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['bhk'] = df['size'].apply(lambda x: int(x.split(' ')[0]))


In [37]:
def predict_price(area_type, location, total_sqft, bath, balcony, bhk):
    input_df = pd.DataFrame([[area_type, location, total_sqft, bath, balcony, bhk]],
                            columns=['area_type', 'location', 'total_sqft', 'bath', 'balcony', 'bhk'])
    input_encoded = encoder.transform(input_df[['area_type', 'location']])
    input_data = pd.concat([input_df.reset_index(drop=True), pd.DataFrame(input_encoded, columns=encoder.get_feature_names_out())], axis=1)
    input_data = input_data.drop(columns=['area_type', 'location'])
    prediction = model.predict(input_data)[0]
    return prediction


In [41]:
import ipywidgets as widgets
from IPython.display import display
from gtts import gTTS
import IPython.display as ipd

# Define widgets
area_type_widget = widgets.Dropdown(options=df['area_type'].unique(), description='Area Type:')
location_widget = widgets.Dropdown(options=df['location'].unique(), description='Location:')
total_sqft_widget = widgets.FloatText(description='Total Sqft:')
bath_widget = widgets.IntText(description='Bath:')
balcony_widget = widgets.IntText(description='Balcony:')
bhk_widget = widgets.IntText(description='BHK:')
output_widget = widgets.Output()

def on_button_click(b):
    with output_widget:
        output_widget.clear_output()
        price = predict_price(area_type_widget.value, location_widget.value, total_sqft_widget.value, bath_widget.value, balcony_widget.value, bhk_widget.value)
        print(f"Predicted Price: {price:.2f} Lakhs")

        # Convert the predicted price to speech
        tts = gTTS(f"The predicted price is {price:.2f} Lakhs", lang='en')
        tts.save("predicted_price.mp3")

        # Automatically play the audio
        ipd.display(ipd.Audio("predicted_price.mp3", autoplay=True))

def on_reset_click(b):
    area_type_widget.value = None
    location_widget.value = None
    total_sqft_widget.value = None
    bath_widget.value = None
    balcony_widget.value = None
    bhk_widget.value = None
    output_widget.clear_output()

button_predict = widgets.Button(description='Predict Price')
button_predict.on_click(on_button_click)

button_reset = widgets.Button(description='Reset')
button_reset.on_click(on_reset_click)

# Display the widgets
display(widgets.HBox([area_type_widget, location_widget, total_sqft_widget, bath_widget, balcony_widget, bhk_widget]))
display(widgets.HBox([button_predict, button_reset]))
display(output_widget)


HBox(children=(Dropdown(description='Area Type:', options=('Super built-up  Area', 'Plot  Area', 'Built-up  Ar…

HBox(children=(Button(description='Predict Price', style=ButtonStyle()), Button(description='Reset', style=But…

Output()