In [1]:
import pandas as pd

In [3]:
app_att = pd.read_csv("../data_source/apartment_attributes.csv")
user_viewings = pd.read_csv("../data_source/user_viewings.csv")
app = pd.read_csv("../data_source/apartments.csv")

In [4]:
# Merge the dataframes
apartments = app_att.merge(app, on="id")

In [23]:
apartments.columns

Index(['id', 'category', 'body', 'amenities', 'bathrooms', 'bedrooms', 'fee',
       'has_photo', 'pets_allowed', 'price_display', 'price_type',
       'square_feet', 'address', 'cityname', 'state', 'latitude', 'longitude',
       'title', 'source', 'price', 'currency', 'listing_created_on',
       'is_active', 'last_modified_timestamp'],
      dtype='object')

In [24]:
user_viewings.columns

Index(['user_id', 'apartment_id', 'viewed_at', 'is_wishlisted',
       'call_to_action'],
      dtype='object')

In [7]:
apartments.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   id                       10000 non-null  int64  
 1   category                 10000 non-null  object 
 2   body                     10000 non-null  object 
 3   amenities                10000 non-null  object 
 4   bathrooms                10000 non-null  float64
 5   bedrooms                 10000 non-null  float64
 6   fee                      0 non-null      float64
 7   has_photo                10000 non-null  object 
 8   pets_allowed             5837 non-null   object 
 9   price_display            10000 non-null  object 
 10  price_type               10000 non-null  object 
 11  square_feet              10000 non-null  int64  
 12  address                  10000 non-null  object 
 13  cityname                 9923 non-null   object 
 14  state                  

In [46]:
user_viewings['call_to_action'].value_counts()

call_to_action
reported         1702
contact_agent    1685
shortlisted      1612
Name: count, dtype: int64

In [8]:
apartments.isnull().sum()

id                             0
category                       0
body                           0
amenities                      0
bathrooms                      0
bedrooms                       0
fee                        10000
has_photo                      0
pets_allowed                4163
price_display                  0
price_type                     0
square_feet                    0
address                        0
cityname                      77
state                         77
latitude                      10
longitude                     10
title                          0
source                         0
price                          0
currency                       0
listing_created_on             0
is_active                      0
last_modified_timestamp        0
dtype: int64

In [10]:
# Average Listings Price (Daily)
apartments['listing_created_on'] = pd.to_datetime(apartments['listing_created_on'])
average_listing_price = apartments.groupby(apartments['listing_created_on'].dt.date)['price'].mean().reset_index(name='average_price')  
average_listing_price

Unnamed: 0,listing_created_on,average_price
0,2024-05-16,1520.200595
1,2024-05-17,1473.570505
2,2024-05-18,1464.841981


In [28]:
# Daily viewings count

# Convert viewed_at to datetime and then calculate daily viewings count
user_viewings['viewed_at'] = pd.to_datetime(user_viewings['viewed_at'])
# Daily viewings count  
daily_bookings = user_viewings.groupby(user_viewings['viewed_at'].dt.date).size().reset_index(name='total_bookings')  

# Daily active listings count  
active_listings = apartments[apartments['is_active'] == 1]  
daily_active_listings = active_listings.groupby(active_listings['listing_created_on'].dt.date).size().reset_index(name='active_listings')  

# Join and compute occupancy rate  
occupancy_rate = daily_bookings.merge(daily_active_listings, left_on='viewed_at', right_on='listing_created_on', how='left')  
occupancy_rate['occupancy_rate'] = (occupancy_rate['total_bookings'] / occupancy_rate['active_listings']) * 100  
occupancy_rate = occupancy_rate.dropna(subset=['occupancy_rate'])
occupancy_rate


Unnamed: 0,viewed_at,total_bookings,listing_created_on,active_listings,occupancy_rate
0,2024-05-16,75,2024-05-16,2789.0,2.689136
1,2024-05-17,250,2024-05-17,2672.0,9.356287
2,2024-05-18,370,2024-05-18,2843.0,13.014421


In [36]:
popular_locations = user_viewings.merge(apartments[['id', 'cityname']], left_on='apartment_id', right_on='id', how='left')
# popular_locations = popular_locations.dropna(subset=['cityname'])
popular_locations['date'] = popular_locations['viewed_at'].dt.date
popular_locations_count = popular_locations.groupby(['date', 'cityname']).size().reset_index(name='bookings')
most_popular_locations = popular_locations_count.loc[popular_locations_count.groupby('date')['bookings'].idxmax()].reset_index(drop=True)
most_popular_locations

Unnamed: 0,date,cityname,bookings
0,2024-05-16,Austin,8
1,2024-05-17,Austin,22
2,2024-05-18,Austin,17
3,2024-05-19,Austin,22
4,2024-05-20,Austin,20
5,2024-05-21,Austin,25
6,2024-05-22,Austin,33
7,2024-05-23,Austin,19
8,2024-05-24,Austin,13
9,2024-05-25,Austin,20


In [37]:
listing_revenue = user_viewings.merge(apartments, left_on='apartment_id', right_on='id')  
listing_revenue['date'] = listing_revenue['viewed_at'].dt.date  
listing_revenue['revenue'] = listing_revenue['price']  
daily_top_performers = listing_revenue.groupby(['date', 'apartment_id']).agg({'revenue': 'sum'}).reset_index()  
daily_top_performers = daily_top_performers.loc[daily_top_performers.groupby('date')['revenue'].idxmax()].reset_index(drop=True)  
daily_top_performers

Unnamed: 0,date,apartment_id,revenue
0,2024-05-16,5509110411,3565
1,2024-05-17,5668626670,8800
2,2024-05-18,5666447277,52500
3,2024-05-19,5668626496,6270
4,2024-05-20,5668643292,11000
5,2024-05-21,5668643292,11000
6,2024-05-22,5508879959,10600
7,2024-05-23,5668616326,8345
8,2024-05-24,5668643292,11000
9,2024-05-25,5668628928,9000


### User Engagement Metrics

In [44]:
total_bookings_per_user = user_viewings.groupby([user_viewings['viewed_at'].dt.date, 'user_id']).size().reset_index(name='total_bookings')  
total_bookings_per_user

Unnamed: 0,viewed_at,user_id,total_bookings
0,2024-05-16,59,1
1,2024-05-16,98,1
2,2024-05-16,136,1
3,2024-05-16,168,1
4,2024-05-16,225,1
...,...,...,...
4788,2024-05-29,4817,1
4789,2024-05-29,4904,1
4790,2024-05-29,4959,1
4791,2024-05-29,4962,1


In [45]:
bookings_per_user_day = user_viewings.groupby([user_viewings['viewed_at'].dt.date, 'user_id']).size().reset_index(name='bookings')  
repeat_customers = bookings_per_user_day[bookings_per_user_day['bookings'] > 1].groupby('viewed_at').size().reset_index(name='repeat_users')  
total_users_per_day = bookings_per_user_day.groupby('viewed_at').size().reset_index(name='total_users')  
repeat_customer_rate = repeat_customers.merge(total_users_per_day, on='viewed_at', how='right').fillna(0)  
repeat_customer_rate['repeat_customer_rate'] = (repeat_customer_rate['repeat_users'] / repeat_customer_rate['total_users']) * 100  
repeat_customer_rate

Unnamed: 0,viewed_at,repeat_users,total_users,repeat_customer_rate
0,2024-05-16,1,74,1.351351
1,2024-05-17,8,241,3.319502
2,2024-05-18,18,352,5.113636
3,2024-05-19,23,441,5.21542
4,2024-05-20,20,442,4.524887
5,2024-05-21,18,409,4.400978
6,2024-05-22,20,455,4.395604
7,2024-05-23,14,395,3.544304
8,2024-05-24,13,408,3.186275
9,2024-05-25,21,456,4.605263
