In [10]:
import pandas as pd


In [11]:
data = pd.read_csv('/content/feature_engineering-1.csv')

In [12]:
data.columns

Index(['property_type', 'society', 'sector', 'price', 'price_per_sqft', 'area',
       'areawithtype', 'bedroom', 'bathroom', 'balcony', 'additionalroom',
       'floornum', 'facing', 'agepossession', 'nearbylocations',
       'furnishdetails', 'features', 'super_built_up_area', 'built_up_area',
       'carpet_area', 'study_room', 'store_room', 'others_room', 'pooja_room',
       'na_room', 'servant_room', 'furnishing_type'],
      dtype='object')

In [14]:
from sklearn.preprocessing import MultiLabelBinarizer
import ast
# Convert the string representation of lists in the 'features' column to actual lists
data['features_list'] = data['features'].apply(lambda x: ast.literal_eval(x) if pd.notnull(x) and x.startswith('[') else [])

# Use MultiLabelBinarizer to convert the features list into a binary matrix
mlb = MultiLabelBinarizer()
features_binary_matrix = mlb.fit_transform(data['features_list'])

# Convert the binary matrix into a DataFrame
features_binary_df = pd.DataFrame(features_binary_matrix, columns=mlb.classes_)

In [15]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
wcss_reduced = []

for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(features_binary_df)
    wcss_reduced.append(kmeans.inertia_)



In [19]:
weights = {
    '24/7 Power Backup': 8,
    '24/7 Water Supply': 4,
    '24x7 Security': 7,
    'ATM': 4,
    'Aerobics Centre': 6,
    'Airy Rooms': 8,
    'Amphitheatre': 7,
    'Badminton Court': 7,
    'Banquet Hall': 8,
    'Bar/Chill-Out Lounge': 9,
    'Barbecue': 7,
    'Basketball Court': 7,
    'Billiards': 7,
    'Bowling Alley': 8,
    'Business Lounge': 9,
    'CCTV Camera Security': 8,
    'Cafeteria': 6,
    'Car Parking': 6,
    'Card Room': 6,
    'Centrally Air Conditioned': 9,
    'Changing Area': 6,
    "Children's Play Area": 7,
    'Cigar Lounge': 9,
    'Clinic': 5,
    'Club House': 9,
    'Concierge Service': 9,
    'Conference room': 8,
    'Creche/Day care': 7,
    'Cricket Pitch': 7,
     'Doctor on Call': 6,
    'Earthquake Resistant': 5,
    'Entrance Lobby': 7,
    'False Ceiling Lighting': 6,
    'Feng Shui / Vaastu Compliant': 5,
    'Fire Fighting Systems': 8,
    'Fitness Centre / GYM': 8,
    'Flower Garden': 7,
    'Food Court': 6,
    'Foosball': 5,
    'Football': 7,
    'Fountain': 7,
    'Gated Community': 7,
    'Golf Course': 10,
    'Grocery Shop': 6,
    'Gymnasium': 8,
    'High Ceiling Height': 8,
    'High Speed Elevators': 8,
    'Infinity Pool': 9,
    'Intercom Facility': 7,
    'Internal Street Lights': 6,
    'Internet/wi-fi connectivity': 7,
    'Jacuzzi': 9,
    'Jogging Track': 7,
    'Landscape Garden': 8,
    'Laundry': 6,
    'Lawn Tennis Court': 8,
    'Library': 8,
    'Lounge': 8,
    'Low Density Society': 7,
    'Maintenance Staff': 6,
    'Manicured Garden': 7,
    'Medical Centre': 5,
    'Milk Booth': 4,
    'Mini Theatre': 9,
    'Multipurpose Court': 7,
    'Multipurpose Hall': 7,
    'Natural Light': 8,
    'Natural Pond': 7,
    'Park': 8,
    'Party Lawn': 8,
    'Piped Gas': 7,
    'Pool Table': 7,
    'Power Back up Lift': 8,
    'Private Garden / Terrace': 9,
    'Property Staff': 7,
    'RO System': 7,
    'Rain Water Harvesting': 7,
    'Reading Lounge': 8,
    'Restaurant': 8,
    'Salon': 8,
    'Sauna': 9,
    'Security / Fire Alarm': 9,
    'Security Personnel': 9,
    'Separate entry for servant room': 8,
    'Sewage Treatment Plant': 6,
    'Shopping Centre': 7,
    'Skating Rink': 7,
    'Solar Lighting': 6,
    'Solar Water Heating': 7,
    'Spa': 9,
    'Spacious Interiors': 9,
    'Squash Court': 8,
    'Steam Room': 9,
    'Sun Deck': 8,
    'Swimming Pool': 8,
    'Temple': 5,
    'Theatre': 9,
    'Toddler Pool': 7,
    'Valet Parking': 9,
    'Video Door Security': 9,
    'Visitor Parking': 7,
    'Water Softener Plant': 7,
    'Water Storage': 7,
    'Water purifier': 7,
    'Yoga/Meditation Area': 7
}
missing_cols = set(weights.keys()) - set(features_binary_df.columns)
for col in missing_cols:
    features_binary_df[col] = 0

# Calculate luxury score
luxury_score = features_binary_df[list(weights.keys())].multiply(pd.Series(weights)).sum(axis=1)

# Display the luxury scores


In [21]:
data['luxury_score'] = luxury_score

In [22]:
data.columns

Index(['property_type', 'society', 'sector', 'price', 'price_per_sqft', 'area',
       'areawithtype', 'bedroom', 'bathroom', 'balcony', 'additionalroom',
       'floornum', 'facing', 'agepossession', 'nearbylocations',
       'furnishdetails', 'features', 'super_built_up_area', 'built_up_area',
       'carpet_area', 'study_room', 'store_room', 'others_room', 'pooja_room',
       'na_room', 'servant_room', 'furnishing_type', 'features_list',
       'luxury_score'],
      dtype='object')

In [23]:
data[['property_type','luxury_score']].sample(3)

Unnamed: 0,property_type,luxury_score
983,flat,72
3435,houses,0
1204,flat,79


In [27]:

data.drop(columns=['nearbylocations','furnishdetails','features','features_list','additionalroom'],inplace=True)

In [28]:
data.columns

Index(['property_type', 'society', 'sector', 'price', 'price_per_sqft', 'area',
       'areawithtype', 'bedroom', 'bathroom', 'balcony', 'floornum', 'facing',
       'agepossession', 'super_built_up_area', 'built_up_area', 'carpet_area',
       'study_room', 'store_room', 'others_room', 'pooja_room', 'na_room',
       'servant_room', 'furnishing_type', 'luxury_score'],
      dtype='object')

In [29]:
data.to_csv('/content/ddd.csv',index=False)