# Nairobi AirBnb Listing data Analysis
- Data for the last 12 months (upto August 30, 2025)

In [1]:
# import libraries and load data 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
import seaborn as sns

# load df_listings with information about properties listed
df_listings = pd.read_csv(r'https://github.com/Morris-mk/Nairobi_AirbnbMarket_Overview_and-_InvestmentAnalysis/raw/refs/heads/main/data/listings_raw_data.csv')

# load df_calendar with information on occupancy of the Airbnbs
df_calendar =pd.read_csv(r'https://github.com/Morris-mk/Nairobi_AirbnbMarket_Overview_and-_InvestmentAnalysis/raw/refs/heads/main/data/past_calendar_rates_raw_data.csv')

In [2]:
print(df_listings.shape)
df_listings.head()

(300, 61)


Unnamed: 0,listing_id,listing_name,listing_type,room_type,cover_photo_url,photos_count,host_id,host_name,cohost_ids,cohost_names,...,l90d_occupancy,l90d_adjusted_occupancy,l90d_revpar,l90d_revpar_native,l90d_adjusted_revpar,l90d_adjusted_revpar_native,l90d_reserved_days,l90d_blocked_days,l90d_available_days,l90d_total_days
0,471581,Located In a Serene Environment,Entire cottage,entire_home,https://a0.muscache.com/im/pictures/6434524/bc...,37,2280941,Bella,,,...,0.5,0.726,20.1,2594.2,29.1,3765.7,45,28,45,90
1,906958,Makena's Place Karen - Flamingo Room,Private room in cottage,private_room,https://a0.muscache.com/im/pictures/68ecc57f-d...,29,4856316,Chichi,,,...,0.111,0.147,5.8,753.7,7.7,997.5,10,22,80,90
2,1023556,Home in Langata 1 km to Nairobi National Park,Entire guesthouse,entire_home,https://a0.muscache.com/im/pictures/01cc7287-2...,20,5634522,Janet,,,...,0.044,0.0,0.9,112.0,0.0,0.0,4,0,86,90
3,1237886,Hob House,Room in bed and breakfast,hotel_room,https://a0.muscache.com/im/pictures/cbdab7e1-f...,8,6748840,,,,...,0.056,0.0,6.9,890.1,0.0,0.0,5,0,85,90
4,1803821,Makena's Place Karen - All Rooms,Private room in cottage,private_room,https://a0.muscache.com/im/pictures/786899c5-b...,58,4856316,Chichi,,,...,0.044,0.055,5.6,727.9,6.9,897.4,4,17,86,90


In [2]:
df_listings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 61 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   listing_id                   300 non-null    int64  
 1   listing_name                 300 non-null    object 
 2   listing_type                 300 non-null    object 
 3   room_type                    300 non-null    object 
 4   cover_photo_url              300 non-null    object 
 5   photos_count                 300 non-null    int64  
 6   host_id                      300 non-null    int64  
 7   host_name                    296 non-null    object 
 8   cohost_ids                   75 non-null     object 
 9   cohost_names                 75 non-null     object 
 10  superhost                    300 non-null    bool   
 11  latitude                     300 non-null    float64
 12  longitude                    300 non-null    float64
 13  guests              

In [4]:
print(df_calendar.shape)
df_calendar.head()

(3542, 14)


Unnamed: 0,listing_id,date,vacant_days,reserved_days,occupancy,revenue,rate_avg,booked_rate_avg,booking_lead_time_avg,length_of_stay_avg,min_nights_avg,native_booked_rate_avg,native_rate_avg,native_revenue
0,21520917,2024-12-01,22,9,0.29,345.0,37.6,38.3,5.0,9.0,,4960.0,4869.0,44680.0
1,39875956,2025-08-01,31,0,0.0,0.0,69.1,,,,1.0,,8928.0,0.0
2,36253079,2025-04-01,30,0,0.0,0.0,33.7,,,,1.0,,4364.0,0.0
3,40675899,2024-09-01,24,6,0.2,243.0,40.2,40.5,1.0,9.0,,5232.0,5193.0,31391.0
4,21520917,2025-01-01,31,0,0.0,0.0,37.2,,,,1.0,,4805.0,0.0


In [5]:
df_calendar.isnull().sum()

listing_id                   0
date                         0
vacant_days                  0
reserved_days                0
occupancy                    0
revenue                      0
rate_avg                     0
booked_rate_avg           1667
booking_lead_time_avg     1874
length_of_stay_avg        1874
min_nights_avg            1162
native_booked_rate_avg    1667
native_rate_avg              0
native_revenue               0
dtype: int64

### Data Cleaning & wrangling 

- remove duplicates
- deal with null values
- harmonize data type inconsistencies
- remove unnecesary features

In [None]:
# remove duplicates
df_listings.drop_duplicates()
df_calendar.drop_duplicates()
print('duplicates removed')

# remove columns that exceed a 50% threhold for missing values for df_Listings
thresh = int( 0.5 * len(df_listings))
df_listings.dropna(thresh=thresh, axis=1, inplace=True)
print('=> features with extreme Nan_values removed')

duplicates removed


In [None]:
# remove duplicates
df_calendar.drop_duplicates()
print('duplicates removed')

# remove columns that dont meet threhold 60% for nan_values for df_calendar
thresh= int(0.6* len(df_calendar))
df_calendar.dropna(thresh=thresh, axis=1, inplace=True)
print(df_calendar.shape)

# convert date column to datetime format
df_calendar['date'] = pd.to_datetime(df_calendar['date'], errors='coerce')
df_calendar.date.dtypes

(3542, 10)


In [None]:
# feature selection and manipulation in df_listings
    # calculate average exchange rates to use during 
rate = (df_listings.ttm_avg_rate_native / df_listings.ttm_avg_rate).mean()

    # convert cleaning_fee and extra_guest_fee from USD to KSH using the average exchange rate
df_listings.loc[:, 'cleaning_fee'] = round(df_listings['cleaning_fee'] * rate, 2)
df_listings.loc[:, 'extra_guest_fee'] = round(df_listings['extra_guest_fee'] * rate, 2)

    # drop some irrelevant columns and columns in USD currency
df_listings.drop(columns = ['cover_photo_url' ,'rating_overall','ttm_revenue', 'ttm_avg_rate','ttm_revpar', 'ttm_adjusted_revpar', 'l90d_revenue', 
                           'l90d_avg_rate', 'l90d_revpar', 'l90d_adjusted_revpar','currency' ], inplace=True)

    # Check remaining missing values
df_listings.head()

Unnamed: 0,listing_id,listing_name,listing_type,room_type,photos_count,host_id,host_name,superhost,latitude,longitude,...,l90d_revenue_native,l90d_avg_rate_native,l90d_occupancy,l90d_adjusted_occupancy,l90d_revpar_native,l90d_adjusted_revpar_native,l90d_reserved_days,l90d_blocked_days,l90d_available_days,l90d_total_days
0,471581,Located In a Serene Environment,Entire cottage,entire_home,37,2280941,Bella,True,-1.2268,36.8577,...,233475.0,5202.7,0.5,0.726,2594.2,3765.7,45,28,45,90
1,906958,Makena's Place Karen - Flamingo Room,Private room in cottage,private_room,29,4856316,Chichi,True,-1.324,36.7053,...,67833.0,6888.1,0.111,0.147,753.7,997.5,10,22,80,90
2,1023556,Home in Langata 1 km to Nairobi National Park,Entire guesthouse,entire_home,20,5634522,Janet,False,-1.3222,36.7852,...,10078.0,2482.1,0.044,0.0,112.0,0.0,4,0,86,90
3,1237886,Hob House,Room in bed and breakfast,hotel_room,8,6748840,,False,-1.2258,36.7679,...,80106.0,15639.3,0.056,0.0,890.1,0.0,5,0,85,90
4,1803821,Makena's Place Karen - All Rooms,Private room in cottage,private_room,58,4856316,Chichi,True,-1.3237,36.7059,...,65507.0,16196.3,0.044,0.055,727.9,897.4,4,17,86,90


In [20]:
# rename some cols for easier referrencing 
df_listings.rename(columns= {'num_reviews':'total_reviews','ttm_revenue_native':'total_ttm_revenue', 'ttm_avg_rate_native': 'daily_avg_ttm_rate',
                             'ttm_revpar_native':'ttm_revpar', 'ttm_adjusted_revpar_native':'ttm_adjusted_revpar', 'l90d_revenue_native':'l90d_revenue',
                             'l90d_avg_rate_native':'l90d_avg_rate','l90d_revpar_native':'l90d_revpar', 'l90d_adjusted_revpar_native':'l90d_adjusted_revpar',
                             }, inplace=True)
df_listings.head()

Unnamed: 0,listing_id,listing_name,listing_type,room_type,photos_count,host_id,host_name,superhost,latitude,longitude,...,ttm_available_days,l90d_revenue,l90d_avg_rate,l90d_occupancy,l90d_adjusted_occupancy,l90d_revpar,l90d_adjusted_revpar,l90d_reserved_days,l90d_blocked_days,l90d_available_days
0,471581,Located In a Serene Environment,Entire cottage,entire_home,37,2280941,Bella,True,-1.2268,36.8577,...,257,233475.0,5202.7,0.5,0.726,2594.2,3765.7,45,28,45
1,906958,Makena's Place Karen - Flamingo Room,Private room in cottage,private_room,29,4856316,Chichi,True,-1.324,36.7053,...,284,67833.0,6888.1,0.111,0.147,753.7,997.5,10,22,80
2,1023556,Home in Langata 1 km to Nairobi National Park,Entire guesthouse,entire_home,20,5634522,Janet,False,-1.3222,36.7852,...,354,10078.0,2482.1,0.044,0.0,112.0,0.0,4,0,86
3,1237886,Hob House,Room in bed and breakfast,hotel_room,8,6748840,,False,-1.2258,36.7679,...,350,80106.0,15639.3,0.056,0.0,890.1,0.0,5,0,85
4,1803821,Makena's Place Karen - All Rooms,Private room in cottage,private_room,58,4856316,Chichi,True,-1.3237,36.7059,...,326,65507.0,16196.3,0.044,0.055,727.9,897.4,4,17,86


In [None]:
# feature manipulation and wrangling in df_calendar
df_calendar.drop(columns=['revenue', 'rate_avg'], inplace=True)
df_calendar.rename(columns={'occupancy':'occupancy_rate', 'native_rate_avg':'rate_avg', 'native_revenue':'revenue'}
                   , inplace=True)
print(df_calendar.shape)
df_calendar.head(2)

(3542, 8)


Unnamed: 0,listing_id,date,vacant_days,reserved_days,occupancy,min_nights_avg,native_rate_avg,native_revenue
0,21520917,2024-12-01,22,9,0.29,,4869.0,44680.0
1,39875956,2025-08-01,31,0,0.0,1.0,8928.0,0.0


### Exploratory data analysis

In [None]:
df_listings.nunique()

listing_id                 300
listing_name               299
listing_type                29
room_type                    3
photos_count                64
host_id                    204
host_name                  167
superhost                    2
latitude                   247
longitude                  247
guests                      11
bedrooms                     9
beds                        12
baths                       12
registration                 2
amenities                  297
instant_book                 2
min_nights                   9
cancellation_policy          4
cleaning_fee                29
extra_guest_fee             20
total_reviews              124
rating_accuracy             12
rating_checkin               8
rating_cleanliness          11
rating_communication         9
rating_location             11
rating_value                11
total_ttm_revenue          300
daily_avg_ttm_rate         300
ttm_occupancy              141
ttm_adjusted_occupancy     157
ttm_revp

In [None]:
# summary stats for numerical features
df_listings.drop(columns =['ttm_total_days', 'l90d_total_days'], inplace=True)
no = ['listing_id', 'host_id','latitude', 'longitude']
num_cols = [col for col in df_listings.select_dtypes(exclude= ['object','bool']) if col not in no ]
df_listings[num_cols].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
photos_count,300.0,30.266667,16.281046,0.0,19.0,28.0,37.0,122.0
guests,258.0,3.585271,2.265134,1.0,2.0,3.0,4.0,16.0
bedrooms,249.0,2.068273,1.583482,1.0,1.0,2.0,3.0,15.0
beds,297.0,2.195286,2.044004,1.0,1.0,2.0,3.0,19.0
baths,300.0,1.456667,1.169479,0.0,1.0,1.0,2.0,8.5
min_nights,298.0,2.197987,3.454036,1.0,1.0,2.0,2.0,30.0
cleaning_fee,298.0,955.090235,1880.496997,0.0,0.0,0.0,1291.95,16278.58
extra_guest_fee,279.0,488.99681,1465.513393,0.0,0.0,0.0,0.0,12919.51
num_reviews,300.0,59.513333,73.959289,1.0,19.0,39.0,77.0,686.0
rating_accuracy,290.0,4.801724,0.183668,3.9,4.7,4.8,4.9,5.0


**insights from the summary statistics**

A typical listing:

    - Has an average of 2 bedrooms, 1 bathroom and requires a minimum booking of 2 nights

    - will cost on average 7,268Ksh to book per day 

    - has an average occupancy rate is about 15.66%

    - generates on average 535,014.27 Ksh in revenue in 12 months, 90 day revenue(3months) is 120408.26 Ksh

    - has  average Adjusted_Revenue_per_available_Room (RevPAR) for the past 12 months is 1,064 Ksh, approximately 1070 Ksh

    - Cost on average 960 Ksh to clean which approximately translates to 1,000 Ksh 

    - Inlcuding an extra guest will on average cost 488 Ksh ~ 500 Ksh


In [14]:
df_listings.nunique()

listing_id                     300
listing_name                   299
listing_type                    29
room_type                        3
photos_count                    64
host_id                        204
host_name                      167
superhost                        2
latitude                       247
longitude                      247
guests                          11
bedrooms                         9
beds                            12
baths                           12
registration                     2
amenities                      297
instant_book                     2
min_nights                       9
cancellation_policy              4
cleaning_fee                    29
extra_guest_fee                 20
num_reviews                    124
rating_accuracy                 12
rating_checkin                   8
rating_cleanliness              11
rating_communication             9
rating_location                 11
rating_value                    11
ttm_revenue_native  

In [None]:
'guests','bedrooms','beds','baths','min_nights','cleaning_fee'

# plot countplots for this cols

['photos_count',
 'guests',
 'bedrooms',
 'beds',
 'baths',
 'min_nights',
 'cleaning_fee',
 'extra_guest_fee',
 'num_reviews',
 'rating_accuracy',
 'rating_checkin',
 'rating_cleanliness',
 'rating_communication',
 'rating_location',
 'rating_value',
 'ttm_revenue_native',
 'ttm_avg_rate_native',
 'ttm_occupancy',
 'ttm_adjusted_occupancy',
 'ttm_revpar_native',
 'ttm_adjusted_revpar_native',
 'ttm_reserved_days',
 'ttm_blocked_days',
 'ttm_available_days',
 'ttm_total_days',
 'l90d_revenue_native',
 'l90d_avg_rate_native',
 'l90d_occupancy',
 'l90d_adjusted_occupancy',
 'l90d_revpar_native',
 'l90d_adjusted_revpar_native',
 'l90d_reserved_days',
 'l90d_blocked_days',
 'l90d_available_days',
 'l90d_total_days']

In [16]:
df_listings.head()

Unnamed: 0,listing_id,listing_name,listing_type,room_type,photos_count,host_id,host_name,superhost,latitude,longitude,...,ttm_available_days,l90d_revenue_native,l90d_avg_rate_native,l90d_occupancy,l90d_adjusted_occupancy,l90d_revpar_native,l90d_adjusted_revpar_native,l90d_reserved_days,l90d_blocked_days,l90d_available_days
0,471581,Located In a Serene Environment,Entire cottage,entire_home,37,2280941,Bella,True,-1.2268,36.8577,...,257,233475.0,5202.7,0.5,0.726,2594.2,3765.7,45,28,45
1,906958,Makena's Place Karen - Flamingo Room,Private room in cottage,private_room,29,4856316,Chichi,True,-1.324,36.7053,...,284,67833.0,6888.1,0.111,0.147,753.7,997.5,10,22,80
2,1023556,Home in Langata 1 km to Nairobi National Park,Entire guesthouse,entire_home,20,5634522,Janet,False,-1.3222,36.7852,...,354,10078.0,2482.1,0.044,0.0,112.0,0.0,4,0,86
3,1237886,Hob House,Room in bed and breakfast,hotel_room,8,6748840,,False,-1.2258,36.7679,...,350,80106.0,15639.3,0.056,0.0,890.1,0.0,5,0,85
4,1803821,Makena's Place Karen - All Rooms,Private room in cottage,private_room,58,4856316,Chichi,True,-1.3237,36.7059,...,326,65507.0,16196.3,0.044,0.055,727.9,897.4,4,17,86


In [None]:
# Plot distributions of the most significant features

cols = ['cleaning_fee','extra_guest_fee', 'num_reviews', 'ttm_adjusted_occupancy','ttm_revpar_native', 
        'ttm_adjusted_revpar_native','ttm_available_days','ttm_revenue_native','ttm_avg_rate_native','l90d_revenue_native', 
        'ttm_reserved_days','l90d_avg_rate_native',]

def create_distributions(data, cols):
    # create the figure canvas
    fig, axs = plt.subplots(figsize=(10,8))
    fig.subplots_adjust()
    axs = axs.flatten()
    plt.tight_layout()

    # plot the boxplots
    for i, col in enumerate(cols):
        if i < len(cols):
            sns.boxplot(data=data, x=col ,ax=axs[i] )
        else:
            break





In [None]:
# Summary statistics for df_calendar
df_calendar.iloc[:, 1:].describe().T

Unnamed: 0,count,mean,min,25%,50%,75%,max,std
date,3542.0,2025-02-13 11:23:00.237154048,2024-09-01 00:00:00,2024-11-01 00:00:00,2025-02-01 00:00:00,2025-05-01 00:00:00,2025-08-01 00:00:00,
vacant_days,3542.0,24.111801,0.0,20.0,28.0,31.0,31.0,8.779706
reserved_days,3542.0,6.302654,0.0,0.0,1.0,10.0,31.0,8.745056
occupancy,3542.0,0.207335,0.0,0.0,0.033,0.333,1.0,0.287834
min_nights_avg,2380.0,2.352941,1.0,1.0,1.0,2.0,31.0,4.175104
native_rate_avg,3542.0,7274.477132,1124.0,4056.25,6045.5,8578.5,102246.0,6844.2532
native_revenue,3542.0,45314.456804,0.0,0.0,7057.5,59069.0,2348193.0,98140.055884


# summary from the summary statistics on listings
