In [2]:
import pandas as pd
import ast

# Replace 'your_file.csv' with the path to your actual CSV file
file_path = 'booking_details.csv'

# Read the CSV data into a DataFrame
df = pd.read_csv(file_path)

# Parse the availability field
df['availability'] = df['availability'].apply(ast.literal_eval)

# Create new columns from the availability dictionary
availability_df = df['availability'].apply(pd.Series)

# Combine the original DataFrame with the new availability DataFrame
booking_df = pd.concat([df.drop(columns=['availability']), availability_df], axis=1)

booking_df['minimum_nights'] = booking_df['minimum_nights'].astype(int)
booking_df['maximum_nights'] = booking_df['maximum_nights'].astype(int)
booking_df['availability_30'] = booking_df['availability_30'].astype(int)
booking_df['availability_60'] = booking_df['availability_60'].astype(int)
booking_df['availability_90'] = booking_df['availability_90'].astype(int)
booking_df['availability_365'] = booking_df['availability_365'].astype(int)


# Rename the DataFrame to booking_dataframe
booking_dataframe = booking_df.copy()

# Check the resulting DataFrame
booking_dataframe.head()


Unnamed: 0,_id,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365
0,10021707,14,1125,0,0,0,0
1,10009999,2,1125,0,0,0,0
2,10066928,1,1125,0,0,0,0
3,10069642,15,20,0,0,0,0
4,10047964,2,1125,27,57,87,362


In [3]:
# Load the DataFrame from CSV
df_host = pd.read_csv("host_information.csv")

# Convert the string representation of dictionaries to dictionaries
df_host['host'] = df_host['host'].apply(ast.literal_eval)

# Create new columns from the host dictionary
host_df = df_host['host'].apply(pd.Series)

# Combine the original DataFrame with the new host DataFrame
df_host = pd.concat([df_host.drop(columns=['host']), host_df], axis=1)

# Clean up missing values
df_host['host_response_time'] = df_host['host_response_time'].fillna('no response time provided')
df_host['host_response_rate'] = df_host['host_response_rate'].fillna(0)
df_host['host_response_rate'] = df_host['host_response_rate'].astype(int)
df_host['host_response_rate'] = df_host['host_response_rate'].astype(str) + '%'

# Replace '0%' in 'host_response_rate' with 'not given'
df_host['host_response_rate'] = df_host['host_response_rate'].replace('0%', 'not given')

# Replace empty strings with "Not provided" in host_neighbourhood
df_host['host_neighbourhood'] = df_host['host_neighbourhood'].replace("", "Not provided")

# Convert boolean columns to bool type
df_host['host_is_superhost'] = df_host['host_is_superhost'].astype(bool)
df_host['host_has_profile_pic'] = df_host['host_has_profile_pic'].astype(bool)
df_host['host_identity_verified'] = df_host['host_identity_verified'].astype(bool)

# Convert host_verifications column to a readable format
df_host['host_verifications'] = df_host['host_verifications'].apply(lambda x: ', '.join(x))

# Improve readability of URLs (assuming URLs are stored as strings)
df_host['host_url'] = df_host['host_url'].apply(lambda x: x.split('?')[0])
df_host['host_thumbnail_url'] = df_host['host_thumbnail_url'].apply(lambda x: x.split('?')[0])
df_host['host_picture_url'] = df_host['host_picture_url'].apply(lambda x: x.split('?')[0])

# Rename columns if needed (ensure consistency with subsequent operations)
df_host.rename(columns={'_id': '_id', 'host_id': 'host_id', 'host_url': 'host_profile_url',
                        'host_name': 'host_name', 'host_location': 'host_location', 'host_about': 'host_about',
                        'host_response_time': 'host_response_time', 'host_thumbnail_url': 'host_thumbnail_url',
                        'host_picture_url': 'host_picture_url', 'host_neighbourhood': 'host_neighbourhood',
                        'host_response_rate': 'host_response_rate', 'host_is_superhost': 'host_is_superhost',
                        'host_has_profile_pic': 'host_has_profile_pic', 'host_identity_verified': 'host_identity_verified',
                        'host_listings_count': 'host_listings_count', 'host_total_listings_count': 'host_total_listings_count',
                        'host_verifications': 'host_verifications'}, inplace=True)

# Assuming you wanted to assign df_host to hosting_dataframe for further use
hosting_dataframe = df_host.copy()

# Check the resulting DataFrame
hosting_dataframe.head()


Unnamed: 0,_id,host_id,host_profile_url,host_name,host_location,host_about,host_thumbnail_url,host_picture_url,host_neighbourhood,host_is_superhost,host_has_profile_pic,host_identity_verified,host_listings_count,host_total_listings_count,host_verifications,host_response_time,host_response_rate
0,10021707,11275734,https://www.airbnb.com/users/show/11275734,Josh,"New York, New York, United States",,https://a0.muscache.com/im/users/11275734/prof...,https://a0.muscache.com/im/users/11275734/prof...,Bushwick,False,True,True,1,1,"email, phone, reviews, kba",no response time provided,not given
1,10009999,1282196,https://www.airbnb.com/users/show/1282196,Ynaie,"Rio de Janeiro, State of Rio de Janeiro, Brazil",I am an artist and traveling is a major part o...,https://a0.muscache.com/im/pictures/9681e3cc-4...,https://a0.muscache.com/im/pictures/9681e3cc-4...,Jardim Botânico,False,True,False,1,1,"email, phone, facebook",no response time provided,not given
2,10066928,9036477,https://www.airbnb.com/users/show/9036477,Margaux,"Montreal, Quebec, Canada",,https://a0.muscache.com/im/users/9036477/profi...,https://a0.muscache.com/im/users/9036477/profi...,Le Plateau,False,True,False,2,2,"email, phone, reviews, work_email",no response time provided,not given
3,10069642,51670240,https://www.airbnb.com/users/show/51670240,Jonathan,"Resende, Rio de Janeiro, Brazil",,https://a0.muscache.com/im/pictures/9a6839d9-9...,https://a0.muscache.com/im/pictures/9a6839d9-9...,Not provided,False,True,False,1,1,"email, phone, jumio, government_id",no response time provided,not given
4,10047964,1241644,https://www.airbnb.com/users/show/1241644,Zeynep,"Istanbul, Istanbul, Turkey",Z.,https://a0.muscache.com/im/users/1241644/profi...,https://a0.muscache.com/im/users/1241644/profi...,Moda,False,True,True,2,2,"email, phone, facebook, reviews, jumio, govern...",no response time provided,not given


In [4]:
df_image = pd.read_csv("images.csv")

# Function to extract the URL from the dictionary string
def extract_url(images_str):
    images_dict = ast.literal_eval(images_str)
    return images_dict['picture_url']

# Apply the function to the 'images' column
df_image['picture_url'] = df_image['images'].apply(extract_url)

df_image = df_image.drop(columns=['images'])
image_dataframe=pd.DataFrame(df_image)

image_dataframe.head()

Unnamed: 0,_id,picture_url
0,10021707,https://a0.muscache.com/im/pictures/72844c8c-f...
1,10009999,https://a0.muscache.com/im/pictures/5b408b9e-4...
2,10066928,https://a0.muscache.com/im/pictures/f208bdd7-b...
3,10069642,https://a0.muscache.com/im/pictures/5b1f4beb-6...
4,10047964,https://a0.muscache.com/im/pictures/231120b6-e...


In [5]:
price_df = pd.read_csv("pricing_details.csv")

# Replace NaN values with 0
price_df.fillna(0, inplace=True)

# Format the prices to two decimal places
price_df['price'] = price_df['price'].apply(lambda x: '{:.2f}'.format(x))
price_df['security_deposit'] = price_df['security_deposit'].apply(lambda x: '{:.2f}'.format(x))
price_df['cleaning_fee'] = price_df['cleaning_fee'].apply(lambda x: '{:.2f}'.format(x))
price_df['weekly_price'] = price_df['weekly_price'].apply(lambda x: '{:.2f}'.format(x))
price_df['monthly_price'] = price_df['monthly_price'].apply(lambda x: '{:.2f}'.format(x))

pricing_dataframe=pd.DataFrame(price_df)

pricing_dataframe.head()

Unnamed: 0,_id,price,weekly_price,monthly_price,cleaning_fee,security_deposit
0,10021707,40.0,0.0,0.0,0.0,0.0
1,10009999,317.0,1492.0,4849.0,187.0,0.0
2,10066928,140.0,0.0,0.0,0.0,0.0
3,10069642,858.0,0.0,0.0,112.0,4476.0
4,10047964,527.0,0.0,0.0,211.0,0.0


In [6]:
# Read the CSV file into a DataFrame
df = pd.read_csv('listing_details.csv')

# Convert the 'address' and 'amenities' columns from string to their respective data types
df['address'] = df['address'].apply(ast.literal_eval)
df['amenities'] = df['amenities'].apply(ast.literal_eval)

# Normalize the 'address' column to extract specific address details
address_df = pd.json_normalize(df['address'])

# Normalize the 'review_scores' column to extract individual review score components
review_scores_df = pd.json_normalize(df['review_scores'].apply(ast.literal_eval))

# Concatenate the normalized DataFrames with the original DataFrame
listing_df = pd.concat([df.drop(columns=['address', 'review_scores']), address_df, review_scores_df], axis=1)

# Optionally, handle 'location.coordinates' if 'location' column is missing
if 'location' in df.columns:
    df['location'] = df['location'].apply(ast.literal_eval)
    location_df = pd.json_normalize(df['location'])
    listing_df = pd.concat([listing_df, location_df[['location.coordinates']]], axis=1)

# Extract latitude and longitude from 'location.coordinates'
listing_df['latitude'] = listing_df['location.coordinates'].apply(lambda x: x[1] if isinstance(x, list) else None)
listing_df['longitude'] = listing_df['location.coordinates'].apply(lambda x: x[0] if isinstance(x, list) else None)

# Drop 'location.coordinates' column
listing_df.drop(columns=['location.coordinates'], inplace=True)

# Clean up 'amenities' column if needed
listing_df['amenities'] = listing_df['amenities'].apply(lambda x: ', '.join(x))

# Fill missing values for any columns, if necessary
listing_df.fillna(value={
    'bedrooms': 0, 'bathrooms': 0, 'beds': 0, 'first_review': '1970-01-01 00:00:00',
    'last_review': '1970-01-01 00:00:00', 'review_scores_accuracy': 0.0, 'review_scores_cleanliness': 0.0,
    'review_scores_checkin': 0.0, 'review_scores_communication': 0.0, 'review_scores_location': 0.0,
    'review_scores_value': 0.0, 'review_scores_rating': 0.0, 'latitude': 0.0, 'longitude': 0.0
}, inplace=True)

# Convert appropriate columns to integers where applicable
columns_to_convert = ['bedrooms', 'bathrooms', 'number_of_reviews', 'accommodates', 'beds']
for column in columns_to_convert:
    listing_df[column] = listing_df[column].astype(int)

# Ensure dates are in datetime format if needed
listing_df['last_scraped'] = pd.to_datetime(listing_df['last_scraped'])
listing_df['calendar_last_scraped'] = pd.to_datetime(listing_df['calendar_last_scraped'])
listing_df['first_review'] = pd.to_datetime(listing_df['first_review'])
listing_df['last_review'] = pd.to_datetime(listing_df['last_review'])

# Optionally add additional columns if not already extracted
additional_columns = {
    'street': 'Rio de Janeiro, Rio de Janeiro, Brazil',
    'suburb': 'Copacabana',
    'government_area': 'Copacabana',
    'market': 'Rio De Janeiro',
    'country': 'Brazil',
    'country_code': 'BR'
}

for col, value in additional_columns.items():
    if col not in listing_df.columns:
        listing_df[col] = value

# Rename the DataFrame to listing_dataframe if needed
listing_dataframe = listing_df.copy()

# Check the resulting DataFrame
listing_dataframe.head()


Unnamed: 0,_id,name,property_type,room_type,bed_type,cancellation_policy,last_scraped,calendar_last_scraped,first_review,last_review,...,location.is_location_exact,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,review_scores_rating,latitude,longitude
0,10021707,Private Room in Bushwick,Apartment,Private room,Real Bed,flexible,2019-03-06 05:00:00,2019-03-06 05:00:00,2016-01-31 05:00:00,2016-01-31 05:00:00,...,True,10.0,10.0,10.0,10.0,8.0,8.0,100.0,40.69791,-73.93615
1,10009999,Horto flat with small garden,Apartment,Entire home/apt,Real Bed,flexible,2019-02-11 05:00:00,2019-02-11 05:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,...,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-22.966254,-43.23075
2,10066928,3 chambres au coeur du Plateau,Apartment,Entire home/apt,Real Bed,flexible,2019-03-11 04:00:00,2019-03-11 04:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,...,True,0.0,0.0,0.0,0.0,0.0,0.0,0.0,45.52233,-73.57383
3,10069642,Ótimo Apto proximo Parque Olimpico,Apartment,Entire home/apt,Real Bed,strict_14_with_grace_period,2019-02-11 05:00:00,2019-02-11 05:00:00,1970-01-01 00:00:00,1970-01-01 00:00:00,...,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-23.000358,-43.431112
4,10047964,Charming Flat in Downtown Moda,House,Entire home/apt,Real Bed,flexible,2019-02-18 05:00:00,2019-02-18 05:00:00,2016-04-02 04:00:00,2016-04-02 04:00:00,...,True,10.0,10.0,10.0,10.0,10.0,10.0,100.0,40.98585,29.03133


In [7]:
listing_dataframe.columns

Index(['_id', 'name', 'property_type', 'room_type', 'bed_type',
       'cancellation_policy', 'last_scraped', 'calendar_last_scraped',
       'first_review', 'last_review', 'accommodates', 'bedrooms', 'beds',
       'number_of_reviews', 'bathrooms', 'amenities', 'extra_people',
       'guests_included', 'street', 'suburb', 'government_area', 'market',
       'country', 'country_code', 'location.type',
       'location.is_location_exact', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location',
       'review_scores_value', 'review_scores_rating', 'latitude', 'longitude'],
      dtype='object')

In [8]:
# Read the CSV file into a DataFrame
df = pd.read_csv('neighborhood_details.csv')

# Convert the 'address' column from string to dictionary
df['address'] = df['address'].apply(ast.literal_eval)

# Normalize the 'address' column
address_df = pd.json_normalize(df['address'])

# Concatenate the normalized DataFrame with the original DataFrame
neighbour_df = pd.concat([df.drop(columns=['address']), address_df], axis=1)

neighbour_df[['latitude', 'longitude']] = pd.DataFrame(neighbour_df['location.coordinates'].tolist(), index=neighbour_df.index)

# Drop the original 'location' column
neighbour_df.drop(columns=['location.coordinates'], inplace=True)

# Fill empty strings and NaN values in 'suburb' with "Not provided"
neighbour_df['suburb'].replace('', 'Not provided', inplace=True)
neighbour_df['suburb'].fillna('Not provided', inplace=True)

neighbour_df = neighbour_df.rename(columns={
    'country': 'neighbour_country',
    'country_code': 'neighbour_country_code',
    'latitude': 'neighbour_latitude',
    'longitude': 'neighbour_longitude',
    'suburb'   : 'neighbour_suburb',
    'street'   : 'neighbour_street', 
    'government_area'  : 'neighbour_government_area'
})

# Rename the DataFrame to neighbour_dataframe
neighbour_dataframe = neighbour_df.copy()

neighbour_dataframe.head()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  neighbour_df['suburb'].replace('', 'Not provided', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  neighbour_df['suburb'].fillna('Not provided', inplace=True)


Unnamed: 0,_id,neighbour_street,neighbour_suburb,neighbour_government_area,market,neighbour_country,neighbour_country_code,location.type,location.is_location_exact,neighbour_latitude,neighbour_longitude
0,10021707,"Brooklyn, NY, United States",Brooklyn,Bushwick,New York,United States,US,Point,True,-73.93615,40.69791
1,10009999,"Rio de Janeiro, Rio de Janeiro, Brazil",Jardim Botânico,Jardim Botânico,Rio De Janeiro,Brazil,BR,Point,True,-43.23075,-22.966254
2,10066928,"Montréal, Québec, Canada",Le Plateau-Mont-Royal,Le Plateau-Mont-Royal,Montreal,Canada,CA,Point,True,-73.57383,45.52233
3,10069642,"Rio de Janeiro, Rio de Janeiro, Brazil",Recreio dos Bandeirantes,Recreio dos Bandeirantes,Rio De Janeiro,Brazil,BR,Point,False,-43.431112,-23.000358
4,10047964,"Kadıköy, İstanbul, Turkey",Moda,Kadikoy,Istanbul,Turkey,TR,Point,True,29.03133,40.98585


In [9]:
select_columns = ['_id', 'host_id', 'host_name', 'host_location', 'host_response_time', 'host_neighbourhood', 'host_response_rate', 'host_total_listings_count', 'host_is_superhost']

hosting_dataframe = hosting_dataframe[select_columns]

In [10]:
neighbour_selected = ['_id', 'neighbour_street', 'neighbour_suburb', 'neighbour_government_area', 'neighbour_country', 'neighbour_country_code', 'neighbour_latitude', 'neighbour_longitude']

neighbour_dataframe  = neighbour_dataframe[neighbour_selected]

In [13]:
# Selecting specific columns for each dataframe
select_columns = ['_id', 'host_id', 'host_name', 'host_location', 'host_response_time', 'host_neighbourhood', 'host_response_rate', 'host_total_listings_count', 'host_is_superhost']
hosting_dataframe = hosting_dataframe[select_columns]

neighbour_selected = ['_id', 'neighbour_street', 'neighbour_suburb', 'neighbour_government_area', 'neighbour_country', 'neighbour_country_code', 'neighbour_latitude', 'neighbour_longitude']

neighbour_dataframe  = neighbour_dataframe[neighbour_selected]

# Perform the merge step by step
merged_df = booking_dataframe.merge(pricing_dataframe, on='_id') \
                             .merge(hosting_dataframe, on='_id') \
                             .merge(listing_dataframe, on='_id') \
                             .merge(image_dataframe, on='_id') \
                             .merge(neighbour_dataframe, on='_id') 

print("Final merged DataFrame shape:", merged_df.shape)


Final merged DataFrame shape: (5555, 62)


In [14]:
final_dataframe=pd.DataFrame(merged_df)

final_dataframe.to_csv('final_dataframe.csv', index=False)

final_dataframe.head()


Unnamed: 0,_id,minimum_nights,maximum_nights,availability_30,availability_60,availability_90,availability_365,price,weekly_price,monthly_price,...,latitude,longitude,picture_url,neighbour_street,neighbour_suburb,neighbour_government_area,neighbour_country,neighbour_country_code,neighbour_latitude,neighbour_longitude
0,10021707,14,1125,0,0,0,0,40.0,0.0,0.0,...,40.69791,-73.93615,https://a0.muscache.com/im/pictures/72844c8c-f...,"Brooklyn, NY, United States",Brooklyn,Bushwick,United States,US,-73.93615,40.69791
1,10009999,2,1125,0,0,0,0,317.0,1492.0,4849.0,...,-22.966254,-43.23075,https://a0.muscache.com/im/pictures/5b408b9e-4...,"Rio de Janeiro, Rio de Janeiro, Brazil",Jardim Botânico,Jardim Botânico,Brazil,BR,-43.23075,-22.966254
2,10066928,1,1125,0,0,0,0,140.0,0.0,0.0,...,45.52233,-73.57383,https://a0.muscache.com/im/pictures/f208bdd7-b...,"Montréal, Québec, Canada",Le Plateau-Mont-Royal,Le Plateau-Mont-Royal,Canada,CA,-73.57383,45.52233
3,10069642,15,20,0,0,0,0,858.0,0.0,0.0,...,-23.000358,-43.431112,https://a0.muscache.com/im/pictures/5b1f4beb-6...,"Rio de Janeiro, Rio de Janeiro, Brazil",Recreio dos Bandeirantes,Recreio dos Bandeirantes,Brazil,BR,-43.431112,-23.000358
4,10047964,2,1125,27,57,87,362,527.0,0.0,0.0,...,40.98585,29.03133,https://a0.muscache.com/im/pictures/231120b6-e...,"Kadıköy, İstanbul, Turkey",Moda,Kadikoy,Turkey,TR,29.03133,40.98585


In [16]:
# Load the DataFrame
final_airbnb_df = pd.read_csv('final_dataframe.csv')

# Check the column names
print("Columns in the DataFrame:", final_airbnb_df.columns)

# Strip any whitespace from column names
final_airbnb_df.columns = final_airbnb_df.columns.str.strip()

# Rename columns to standardize names
final_airbnb_df = final_airbnb_df.rename(columns={
    'price_x': 'price',
    'price_y': 'extra_price',
    'suburb_x': 'suburb',
    'government_area_x': 'government_area'
})

# Ensure the required columns exist
required_columns = [
    'price', 'security_deposit', 'cleaning_fee', 'extra_price',
    'weekly_price', 'monthly_price', 'minimum_nights', 'maximum_nights'
]

for col in required_columns:
    if col not in final_airbnb_df.columns:
        final_airbnb_df[col] = 0

# Ensure the required columns have numeric values and handle missing values
numeric_columns = [
    'price', 'security_deposit', 'cleaning_fee', 
    'weekly_price', 'monthly_price'
]
final_airbnb_df[numeric_columns] = final_airbnb_df[numeric_columns].apply(pd.to_numeric, errors='coerce').fillna(0)

# Calculate booked nights as the sum of minimum and maximum nights
final_airbnb_df['booked_nights'] = final_airbnb_df['minimum_nights'] + final_airbnb_df['maximum_nights']

# Calculate total price
final_airbnb_df['total_price'] = (
    final_airbnb_df['price'] + 
    final_airbnb_df['security_deposit'] + 
    final_airbnb_df['cleaning_fee'] + 
    final_airbnb_df['weekly_price'] + 
    final_airbnb_df['monthly_price']
)

# Calculate total revenue
final_airbnb_df['total_revenue'] = final_airbnb_df['total_price'] * final_airbnb_df['booked_nights']

# Calculate total cost
final_airbnb_df['total_cost'] = final_airbnb_df['security_deposit'] + final_airbnb_df['cleaning_fee']

# Calculate net profit
final_airbnb_df['net_profit'] = final_airbnb_df['total_revenue'] - final_airbnb_df['total_cost']

# Assign conditions directly to weekly_price and monthly_price
final_airbnb_df['weekly_price_filter'] = final_airbnb_df['weekly_price'].apply(lambda x: '1000' if x > 1000 else f"{x:.2f}")
final_airbnb_df['monthly_price_filter'] = final_airbnb_df['monthly_price'].apply(lambda x: '3000' if x > 3000 else f"{x:.2f}")

# Define conditions for minimum and maximum prices
minimum_condition = final_airbnb_df['total_price'] <= 3000
maximum_condition = final_airbnb_df['total_price'] > 3000

# Create new columns based on conditions and convert NaN to 0 for maximum_price
final_airbnb_df['minimum_price'] = final_airbnb_df['total_price'].where(minimum_condition, other=0)
final_airbnb_df['maximum_price'] = final_airbnb_df['total_price'].where(maximum_condition, other=0)

# Format numeric columns to dollar format before converting to strings
price_columns = [
    'price', 'security_deposit', 'cleaning_fee', 'extra_price',
    'total_price', 'total_revenue', 'total_cost', 'net_profit', 'weekly_price', 'monthly_price'
]

for col in price_columns:
    final_airbnb_df[col] = final_airbnb_df[col].apply(lambda x: f"{x:.2f}")

# Save the DataFrame to a CSV file
final_airbnb_df.to_csv('final_airbnb_df.csv', index=False)

# Print a message to confirm the save
print("DataFrame saved to final_airbnb_df.csv")


Columns in the DataFrame: Index(['_id', 'minimum_nights', 'maximum_nights', 'availability_30',
       'availability_60', 'availability_90', 'availability_365', 'price',
       'weekly_price', 'monthly_price', 'cleaning_fee', 'security_deposit',
       'host_id', 'host_name', 'host_location', 'host_response_time',
       'host_neighbourhood', 'host_response_rate', 'host_total_listings_count',
       'host_is_superhost', 'name', 'property_type', 'room_type', 'bed_type',
       'cancellation_policy', 'last_scraped', 'calendar_last_scraped',
       'first_review', 'last_review', 'accommodates', 'bedrooms', 'beds',
       'number_of_reviews', 'bathrooms', 'amenities', 'extra_people',
       'guests_included', 'street', 'suburb', 'government_area', 'market',
       'country', 'country_code', 'location.type',
       'location.is_location_exact', 'review_scores_accuracy',
       'review_scores_cleanliness', 'review_scores_checkin',
       'review_scores_communication', 'review_scores_location'

In [18]:
final_airbnb_df.to_excel("airbnb_dashboard.xlsx", index=False)  # Set index=False to exclude row numbers in Excel