In [97]:
import pandas as pd
# 1. Read CSV file
df = pd.read_csv("House_analysis_data.csv")
# 2. Number of features (columns)
print("Number of Columns (Features):", df.shape[1])
# 3. Number of observations (rows)
print("Number of Rows (Observations):", df.shape[0])
# 4. Data type of each feature
print("\nData Types of Each Column:\n")
print(df.dtypes)
# 5. Missing values in each column
print("\nMissing Values in Each Column:\n")
print(df.isnull().sum())


Number of Columns (Features): 14
Number of Rows (Observations): 400

Data Types of Each Column:

Title              object
Locality           object
Furnishing         object
Built_up_area       int64
Lease_type         object
Available_from     object
Price              object
Bathrooms           int64
Balcony           float64
Carpet_area       float64
Gas_Pipeline       object
Gate_Community     object
Floor_number      float64
Total_floors      float64
dtype: object

Missing Values in Each Column:

Title              0
Locality           0
Furnishing         0
Built_up_area      0
Lease_type         2
Available_from     0
Price              0
Bathrooms          0
Balcony           79
Carpet_area       75
Gas_Pipeline       0
Gate_Community     0
Floor_number      43
Total_floors      43
dtype: int64


In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Title           400 non-null    object 
 1   Locality        400 non-null    object 
 2   Furnishing      400 non-null    object 
 3   Built_up_area   400 non-null    int64  
 4   Lease_type      398 non-null    object 
 5   Available_from  400 non-null    object 
 6   Price           400 non-null    object 
 7   Bathrooms       400 non-null    int64  
 8   Balcony         321 non-null    float64
 9   Carpet_area     325 non-null    float64
 10  Gas_Pipeline    400 non-null    object 
 11  Gate_Community  400 non-null    object 
 12  Floor_number    357 non-null    float64
 13  Total_floors    357 non-null    float64
dtypes: float64(4), int64(2), object(8)
memory usage: 43.9+ KB


In [3]:
df.head()


Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,,No,No,,


In [98]:
import pandas as pd
import numpy as np
import re

# Load CSV
df = pd.read_csv("House_analysis_data.csv")

# -------------------------------
# 1. REMOVE SPECIAL CHARACTERS
# -------------------------------

# Price – remove commas, spaces, currency symbols
df["Price"] = (
    df["Price"].astype(str)
    .str.replace(",", "")
    .str.replace(r"[^\d.]", "", regex=True)
)

# Built-up Area – extract numbers only
df["Built_up_area"] = (
    df["Built_up_area"]
    .astype(str)
    .str.extract(r"(\d+\.?\d*)")
)

# Carpet Area – extract only numeric part
df["Carpet_area"] = (
    df["Carpet_area"]
    .astype(str)
    .str.replace(",", "")
    .str.extract(r"(\d+\.?\d*)")
)

# Floor Number – extract floor number
df["Floor_number"] = (
    df["Floor_number"]
    .astype(str)
    .str.extract(r"(\d+)")
)

# Total Floors
df["Total_floors"] = (
    df["Total_floors"]
    .astype(str)
    .str.extract(r"(\d+)")
)


# # -------------------------------



# # -------------------------------
# # 6. DISPLAY CLEANED DATA
# # -------------------------------

# print("\nFinal Data Types:\n", df.dtypes)
# print("\nRemaining Missing Values:\n", df.isnull().sum())
# print("\nSample Cleaned Data:\n", df.head())

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Title           400 non-null    object 
 1   Locality        400 non-null    object 
 2   Furnishing      400 non-null    object 
 3   Built_up_area   400 non-null    object 
 4   Lease_type      398 non-null    object 
 5   Available_from  400 non-null    object 
 6   Price           400 non-null    object 
 7   Bathrooms       400 non-null    int64  
 8   Balcony         321 non-null    float64
 9   Carpet_area     325 non-null    object 
 10  Gas_Pipeline    400 non-null    object 
 11  Gate_Community  400 non-null    object 
 12  Floor_number    357 non-null    object 
 13  Total_floors    357 non-null    object 
dtypes: float64(1), int64(1), object(12)
memory usage: 43.9+ KB


In [99]:
df.head()

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,,No,No,,


In [100]:
print(df.dtypes)

Title              object
Locality           object
Furnishing         object
Built_up_area      object
Lease_type         object
Available_from     object
Price              object
Bathrooms           int64
Balcony           float64
Carpet_area        object
Gas_Pipeline       object
Gate_Community     object
Floor_number       object
Total_floors       object
dtype: object


In [101]:
# 2. FIX INCORRECT FORMATS / INVALID VALUES
# -------------------------------

# Replace empty strings or 'nan' with NaN
df.replace(["", " ", "nan", "None", None], np.nan, inplace=True)


In [102]:
df.head()

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,,No,No,,


In [103]:
# convert datatypes
numeric_cols = [
    "Price", "Built_up_area", "Carpet_area",
    "Bathrooms", "Balcony", "Floor_number", "Total_floors"
]

for col in numeric_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df.head()

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,,No,No,,


In [104]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Title           400 non-null    object 
 1   Locality        400 non-null    object 
 2   Furnishing      400 non-null    object 
 3   Built_up_area   400 non-null    int64  
 4   Lease_type      398 non-null    object 
 5   Available_from  400 non-null    object 
 6   Price           400 non-null    int64  
 7   Bathrooms       400 non-null    int64  
 8   Balcony         321 non-null    float64
 9   Carpet_area     325 non-null    float64
 10  Gas_Pipeline    400 non-null    object 
 11  Gate_Community  400 non-null    object 
 12  Floor_number    357 non-null    float64
 13  Total_floors    357 non-null    float64
dtypes: float64(4), int64(3), object(7)
memory usage: 43.9+ KB


In [105]:
print(df.isnull().sum())

Title              0
Locality           0
Furnishing         0
Built_up_area      0
Lease_type         2
Available_from     0
Price              0
Bathrooms          0
Balcony           79
Carpet_area       75
Gas_Pipeline       0
Gate_Community     0
Floor_number      43
Total_floors      43
dtype: int64


In [106]:
# 4. HANDLE MISSING VALUES
# Numeric columns – fill missing with median
for col in numeric_cols:
    if df[col].isnull().sum() > 0:
        df[col] = df[col].fillna(df[col].median())
# Categorical – fill missing with mode
for col in ["Lease_type"]:
    if df[col].isnull().sum() > 0:
        df[col] = df[col].fillna(df[col].mode()[0])
df.head()

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0


In [107]:
# 6. DISPLAY CLEANED DATA
# -------------------------------

print("\nFinal Data Types:\n", df.dtypes)
print("\nRemaining Missing Values:\n", df.isnull().sum())
print("\nSample Cleaned Data:\n", df.head())



Final Data Types:
 Title              object
Locality           object
Furnishing         object
Built_up_area       int64
Lease_type         object
Available_from     object
Price               int64
Bathrooms           int64
Balcony           float64
Carpet_area       float64
Gas_Pipeline       object
Gate_Community     object
Floor_number      float64
Total_floors      float64
dtype: object

Remaining Missing Values:
 Title             0
Locality          0
Furnishing        0
Built_up_area     0
Lease_type        0
Available_from    0
Price             0
Bathrooms         0
Balcony           0
Carpet_area       0
Gas_Pipeline      0
Gate_Community    0
Floor_number      0
Total_floors      0
dtype: int64

Sample Cleaned Data:
                              Title                   Locality  \
0            2.5 BHK Flat for Rent   Tirumanahalli, Bangalore   
1              1 BHK Flat for Rent      BTM Layout, Bangalore   
2              2 BHK Flat for Rent    Mahadevapura, Bangalore  

In [108]:
df.head()

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0


In [109]:
df.head()

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0


In [110]:
df.Title

0                 2.5 BHK Flat for Rent
1                   1 BHK Flat for Rent
2                   2 BHK Flat for Rent
3                   3 BHK Flat for Rent
4       1 RK Independent House for Rent
                     ...               
395                 1 BHK Flat for Rent
396                 1 BHK Flat for Rent
397                 1 BHK Flat for Rent
398                 4 BHK Flat for Rent
399    3 BHK Independent House for Rent
Name: Title, Length: 400, dtype: object

In [111]:

df["BHK_Number"] = df["Title"].str.extract(r"(\d+\.?\d*)\s*BHK", expand=False)

# ---- Remove 'X BHK' and get property type ----
df["Property_Title"] = (
    df["Title"]
    .str.replace(r"\d+\.?\d*\s*BHK\s*", "", regex=True)   # remove "X BHK"
    .str.replace("for Rent", "", regex=False)             # remove "for Rent"
    .str.strip()
)

df.head()


Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors,BHK_Number,Property_Title
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0,2.5,Flat
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0,1.0,Flat
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0,2.0,Flat
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0,3.0,Flat
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0,,1 RK Independent House


In [112]:
print(df.isnull().sum())

Title             0
Locality          0
Furnishing        0
Built_up_area     0
Lease_type        0
Available_from    0
Price             0
Bathrooms         0
Balcony           0
Carpet_area       0
Gas_Pipeline      0
Gate_Community    0
Floor_number      0
Total_floors      0
BHK_Number        6
Property_Title    0
dtype: int64


In [113]:
df[df["BHK_Number"].isnull()][["Title"]]


Unnamed: 0,Title
4,1 RK Independent House for Rent
22,1 RK Studio for Rent
79,1 RK Studio for Rent
126,1 RK Flat for Rent
163,1 RK Flat for Rent
364,1 RK Independent Builder Floor for Rent


In [114]:
df["BHK_Number"] = df["Title"].str.extract(r"(\d+\.?\d*)\s*(BHK|RK)", expand=False)[0]

# ---- Clean Property_Title (remove BHK/RK and 'for Rent') ----
df["Property_Title"] = (
    df["Title"]
    .str.replace(r"\d+\.?\d*\s*(BHK|RK)\s*", "", regex=True)   # remove 'X BHK' or 'X RK'
    .str.replace("for Rent", "", regex=False)                  # remove 'for Rent'
    .str.strip()
)


In [115]:
df

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors,BHK_Number,Property_Title
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0,2.5,Flat
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0,1,Flat
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0,2,Flat
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0,3,Flat
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0,1,Independent House
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,1 BHK Flat for Rent,"R.K. Hegde Nagar, Bangalore",Semi Furnished,750,Family / Bachelor / Company,Available now,18000,1,2.0,650.0,Yes,Yes,3.0,4.0,1,Flat
396,1 BHK Flat for Rent,"S.G. Palya, Bangalore",Fully Furnished,690,Family / Bachelor,Available now,29000,1,1.0,600.0,Yes,Yes,5.0,7.0,1,Flat
397,1 BHK Flat for Rent,"Kannuru, Bangalore",Fully Furnished,1074,Family / Bachelor / Company,Available now,150008,2,1.0,754.0,Yes,Yes,4.0,16.0,1,Flat
398,4 BHK Flat for Rent,"Jakkur, Bangalore",Semi Furnished,2020,Family / Company,Available now,90000,4,1.0,1980.0,No,Yes,7.0,19.0,4,Flat


In [48]:
df.Locality

0         Tirumanahalli, Bangalore
1            BTM Layout, Bangalore
2          Mahadevapura, Bangalore
3        Gunjur Village, Bangalore
4          Mahadevapura, Bangalore
                  ...             
395    R.K. Hegde Nagar, Bangalore
396          S.G. Palya, Bangalore
397             Kannuru, Bangalore
398              Jakkur, Bangalore
399        Mahadevapura, Bangalore
Name: Locality, Length: 400, dtype: object

In [116]:
df["Locality_area"] = df["Locality"].str.replace(", Bangalore", "", regex=False).str.strip()


In [117]:
df


Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors,BHK_Number,Property_Title,Locality_area
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0,2.5,Flat,Tirumanahalli
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0,1,Flat,BTM Layout
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0,2,Flat,Mahadevapura
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0,3,Flat,Gunjur Village
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0,1,Independent House,Mahadevapura
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,1 BHK Flat for Rent,"R.K. Hegde Nagar, Bangalore",Semi Furnished,750,Family / Bachelor / Company,Available now,18000,1,2.0,650.0,Yes,Yes,3.0,4.0,1,Flat,R.K. Hegde Nagar
396,1 BHK Flat for Rent,"S.G. Palya, Bangalore",Fully Furnished,690,Family / Bachelor,Available now,29000,1,1.0,600.0,Yes,Yes,5.0,7.0,1,Flat,S.G. Palya
397,1 BHK Flat for Rent,"Kannuru, Bangalore",Fully Furnished,1074,Family / Bachelor / Company,Available now,150008,2,1.0,754.0,Yes,Yes,4.0,16.0,1,Flat,Kannuru
398,4 BHK Flat for Rent,"Jakkur, Bangalore",Semi Furnished,2020,Family / Company,Available now,90000,4,1.0,1980.0,No,Yes,7.0,19.0,4,Flat,Jakkur


In [118]:
df.to_csv("House_analysis_data_is_cleaned.csv", index=False)

In [119]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Title           400 non-null    object 
 1   Locality        400 non-null    object 
 2   Furnishing      400 non-null    object 
 3   Built_up_area   400 non-null    int64  
 4   Lease_type      400 non-null    object 
 5   Available_from  400 non-null    object 
 6   Price           400 non-null    int64  
 7   Bathrooms       400 non-null    int64  
 8   Balcony         400 non-null    float64
 9   Carpet_area     400 non-null    float64
 10  Gas_Pipeline    400 non-null    object 
 11  Gate_Community  400 non-null    object 
 12  Floor_number    400 non-null    float64
 13  Total_floors    400 non-null    float64
 14  BHK_Number      400 non-null    object 
 15  Property_Title  400 non-null    object 
 16  Locality_area   400 non-null    object 
dtypes: float64(4), int64(3), object(10)

In [60]:
df["Property_Title"].astype("category")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Title           400 non-null    object 
 1   Locality        400 non-null    object 
 2   Furnishing      400 non-null    object 
 3   Built_up_area   400 non-null    int64  
 4   Lease_type      400 non-null    object 
 5   Available_from  400 non-null    object 
 6   Price           400 non-null    int64  
 7   Bathrooms       400 non-null    int64  
 8   Balcony         400 non-null    float64
 9   Carpet_area     400 non-null    float64
 10  Gas_Pipeline    400 non-null    object 
 11  Gate_Community  400 non-null    object 
 12  Floor_number    400 non-null    float64
 13  Total_floors    400 non-null    float64
 14  BHK_Number      400 non-null    object 
 15  Property_Title  400 non-null    object 
 16  Locality_area   400 non-null    object 
dtypes: float64(4), int64(3), object(10)

In [59]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Title           400 non-null    object 
 1   Locality        400 non-null    object 
 2   Furnishing      400 non-null    object 
 3   Built_up_area   400 non-null    int64  
 4   Lease_type      400 non-null    object 
 5   Available_from  400 non-null    object 
 6   Price           400 non-null    int64  
 7   Bathrooms       400 non-null    int64  
 8   Balcony         400 non-null    float64
 9   Carpet_area     400 non-null    float64
 10  Gas_Pipeline    400 non-null    object 
 11  Gate_Community  400 non-null    object 
 12  Floor_number    400 non-null    float64
 13  Total_floors    400 non-null    float64
 14  BHK_Number      400 non-null    object 
 15  Property_Title  400 non-null    object 
 16  Locality_area   400 non-null    object 
dtypes: float64(4), int64(3), object(10)

In [63]:
df.head(10)

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors,BHK_Number,Property_Title,Locality_area
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,Available now,42006,2,1.0,795.0,Yes,Yes,3.0,24.0,2.5,Flat,Tirumanahalli
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,Available now,28500,1,1.0,600.0,Yes,Yes,5.0,7.0,1.0,Flat,BTM Layout
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,"Nov, 2025",38000,2,1.0,850.0,No,No,3.0,4.0,2.0,Flat,Mahadevapura
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,Available now,65000,3,3.0,1304.0,No,Yes,26.0,30.0,3.0,Flat,Gunjur Village
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,Available now,18000,1,1.0,850.0,No,No,3.0,6.0,1.0,Independent House,Mahadevapura
5,1 BHK Duplex for Rent,"Brookefield, Bangalore",Semi Furnished,700,Family / Bachelor,Available now,32000,1,1.0,650.0,No,No,1.0,6.0,1.0,Duplex,Brookefield
6,1 BHK Flat for Rent,"Thanisandra, Bangalore",Semi Furnished,690,Family / Bachelor / Company,Available now,18000,1,1.0,600.0,No,Yes,3.0,4.0,1.0,Flat,Thanisandra
7,4 BHK Penthouse for Rent,"Whitefield, Bangalore",Semi Furnished,4400,Family,Available now,160000,4,4.0,4000.0,No,Yes,18.0,18.0,4.0,Penthouse,Whitefield
8,4 BHK Independent House for Rent,"Cheemasandra, Bangalore",Unfurnished,4000,Family,"Jan, 2026",90000,5,2.0,3900.0,No,Yes,1.0,4.0,4.0,Independent House,Cheemasandra
9,3 BHK Flat for Rent,"Narayanapura, Bangalore",Semi Furnished,1350,Family / Bachelor / Company,Available now,50000,3,1.0,850.0,No,No,3.0,6.0,3.0,Flat,Narayanapura


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype   
---  ------          --------------  -----   
 0   Title           400 non-null    object  
 1   Locality        400 non-null    object  
 2   Furnishing      400 non-null    category
 3   Built_up_area   400 non-null    int64   
 4   Lease_type      400 non-null    object  
 5   Available_from  400 non-null    object  
 6   Price           400 non-null    int64   
 7   Bathrooms       400 non-null    int64   
 8   Balcony         400 non-null    float64 
 9   Carpet_area     400 non-null    float64 
 10  Gas_Pipeline    400 non-null    object  
 11  Gate_Community  400 non-null    object  
 12  Floor_number    400 non-null    float64 
 13  Total_floors    400 non-null    float64 
 14  BHK_Number      400 non-null    object  
 15  Property_Title  400 non-null    object  
 16  Locality_area   400 non-null    object  
dtypes: category(1), 

In [65]:
df["Furnishing"] = df["Furnishing"].astype("category")


In [86]:
df["Lease_type"] = df["Lease_type"].astype("category")
df["Gate_Community"] = df["Gate_Community"].map({"Yes": 1, "No": 0})
df["Gas_Pipeline"] = df["Gas_Pipeline"].map({"Yes": 1, "No": 0})
df["Available_from"] = df["Available_from"].replace(
    "Available now", pd.Timestamp.today().date()
)
df["Available_from"] = pd.to_datetime(df["Available_from"], errors="coerce")
df["Locality_area"] = df["Locality_area"].astype("category")

df["Property_Title"] = df["Property_Title"].astype("category")
# Convert BHK_Number from float to int
df["BHK_Number"] = df["BHK_Number"].astype("float")

# Verify the datatype
df["BHK_Number"].dtypes





dtype('float64')

In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   Title           400 non-null    object        
 1   Locality        400 non-null    object        
 2   Furnishing      400 non-null    category      
 3   Built_up_area   400 non-null    int64         
 4   Lease_type      400 non-null    category      
 5   Available_from  400 non-null    datetime64[ns]
 6   Price           400 non-null    int64         
 7   Bathrooms       400 non-null    int64         
 8   Balcony         400 non-null    float64       
 9   Carpet_area     400 non-null    float64       
 10  Gas_Pipeline    0 non-null      float64       
 11  Gate_Community  0 non-null      float64       
 12  Floor_number    400 non-null    float64       
 13  Total_floors    400 non-null    float64       
 14  BHK_Number      400 non-null    float64       
 15  Proper

In [87]:
df

Unnamed: 0,Title,Locality,Furnishing,Built_up_area,Lease_type,Available_from,Price,Bathrooms,Balcony,Carpet_area,Gas_Pipeline,Gate_Community,Floor_number,Total_floors,BHK_Number,Property_Title,Locality_area
0,2.5 BHK Flat for Rent,"Tirumanahalli, Bangalore",Semi Furnished,1265,Family / Bachelor / Company,2025-11-24,42006,2,1.0,795.0,,,3.0,24.0,2.5,Flat,Tirumanahalli
1,1 BHK Flat for Rent,"BTM Layout, Bangalore",Fully Furnished,700,Family / Bachelor / Company,2025-11-24,28500,1,1.0,600.0,,,5.0,7.0,1.0,Flat,BTM Layout
2,2 BHK Flat for Rent,"Mahadevapura, Bangalore",Semi Furnished,1185,Family / Bachelor / Company,2025-11-01,38000,2,1.0,850.0,,,3.0,4.0,2.0,Flat,Mahadevapura
3,3 BHK Flat for Rent,"Gunjur Village, Bangalore",Semi Furnished,1655,Family,2025-11-24,65000,3,3.0,1304.0,,,26.0,30.0,3.0,Flat,Gunjur Village
4,1 RK Independent House for Rent,"Mahadevapura, Bangalore",Fully Furnished,400,Family / Bachelor,2025-11-24,18000,1,1.0,850.0,,,3.0,6.0,1.0,Independent House,Mahadevapura
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,1 BHK Flat for Rent,"R.K. Hegde Nagar, Bangalore",Semi Furnished,750,Family / Bachelor / Company,2025-11-24,18000,1,2.0,650.0,,,3.0,4.0,1.0,Flat,R.K. Hegde Nagar
396,1 BHK Flat for Rent,"S.G. Palya, Bangalore",Fully Furnished,690,Family / Bachelor,2025-11-24,29000,1,1.0,600.0,,,5.0,7.0,1.0,Flat,S.G. Palya
397,1 BHK Flat for Rent,"Kannuru, Bangalore",Fully Furnished,1074,Family / Bachelor / Company,2025-11-24,150008,2,1.0,754.0,,,4.0,16.0,1.0,Flat,Kannuru
398,4 BHK Flat for Rent,"Jakkur, Bangalore",Semi Furnished,2020,Family / Company,2025-11-24,90000,4,1.0,1980.0,,,7.0,19.0,4.0,Flat,Jakkur
