In [100]:
# !git clone https://github.com/AshishJangra27/datasets

# Agenda | Top 10 Airbnbs of Manali under 5000

1. Rating
2. Amenities
3. Reviews

### 1. Exploratory Data Analysis

In [88]:
import pandas as pd

data = pd.read_csv('/content/datasets/Airbnb/airbnb_v2.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,id,name,rating,reviews,host_name,host_id,address,features,amenities,...,price,country,bathrooms,beds,guests,toiles,bedrooms,studios,checkin,checkout
0,0,49849504,Perla bungalov,4.71,64,Mehmetcan,357334205.0,"Kartepe, Kocaeli, Turkey","2 guests,2 bedrooms,1 bed,1 bathroom","Mountain view,Valley view,Lake access,Kitchen,...",...,8078,Turkey,1,1,2,0,2,0,Flexible,12 00 pm
1,1,50891766,Authentic Beach Architect Sheltered Villa with...,New,0,Fatih,386223873.0,"Kaş, Antalya, Turkey","4 guests,2 bedrooms,2 beds,2 bathrooms","Kitchen,Wifi,Dedicated workspace,Free parking ...",...,4665,Turkey,2,2,4,0,2,0,4 00 pm - 11 00 pm,10 00 am
2,2,50699164,cottages sataplia,4.85,68,Giorgi,409690853.0,"Imereti, Georgia","4 guests,1 bedroom,3 beds,1 bathroom","Mountain view,Kitchen,Wifi,Dedicated workspace...",...,5991,Georgia,1,3,4,0,1,0,After 1 00 pm,12 00 pm
3,3,49871422,Sapanca Breathable Bungalow,5.0,13,Melih,401873242.0,"Sapanca, Sakarya, Turkey","4 guests,1 bedroom,2 beds,1 bathroom","Mountain view,Valley view,Kitchen,Wifi,Free pa...",...,11339,Turkey,1,2,4,0,1,0,After 2 00 pm,12 00 pm
4,4,51245886,Bungalov Ev 2,New,0,Arp Sapanca,414884116.0,"Sapanca, Sakarya, Turkey","2 guests,1 bedroom,1 bed,1 bathroom","Kitchen,Wifi,Free parking on premises,TV,Air c...",...,6673,Turkey,1,1,2,0,1,0,After 2 00 pm,12 00 pm


#### 1.1) Checking basic detials

In [89]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12805 entries, 0 to 12804
Data columns (total 23 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Unnamed: 0    12805 non-null  int64  
 1   id            12805 non-null  int64  
 2   name          12805 non-null  object 
 3   rating        12805 non-null  object 
 4   reviews       12805 non-null  object 
 5   host_name     12797 non-null  object 
 6   host_id       12805 non-null  float64
 7   address       12805 non-null  object 
 8   features      12805 non-null  object 
 9   amenities     12805 non-null  object 
 10  safety_rules  12805 non-null  object 
 11  hourse_rules  12805 non-null  object 
 12  img_links     12805 non-null  object 
 13  price         12805 non-null  int64  
 14  country       12805 non-null  object 
 15  bathrooms     12805 non-null  int64  
 16  beds          12805 non-null  int64  
 17  guests        12805 non-null  int64  
 18  toiles        12805 non-nu

#### 1.2) Check null values

In [90]:
data.isnull().sum()

Unnamed: 0,0
Unnamed: 0,0
id,0
name,0
rating,0
reviews,0
host_name,8
host_id,0
address,0
features,0
amenities,0


#### 1.3) Check Duplicates

In [91]:
len(data) - data['id'].nunique()

0

#### 1.4) Remove un-necessary columns

In [92]:
del data['Unnamed: 0']
del data['host_name']
del data['host_id']
del data['img_links']
del data['checkin']
del data['checkout']

### 2. Data Cleaning

#### 2.1) Cleaning Country Column

In [93]:
data['country'] = data['country'].str.strip()

#### 2.2) Taking only the ones of India

In [94]:
data = data[data['country'] == 'India']

#### 2.3) Creating cities column

In [95]:
data['cities'] = data['address'].str.split(',').str[0]

#### 2.4) Taking only the ones of Manali adn has price less than 5000

In [96]:
data = data[data['cities'] == 'Manali']
data = data[data['price'] < 5000]

### 3. Analyse the Data

#### 3.1) Finding top 10 based on Rating

In [97]:
data['rating'] = data['rating'].str.replace('New','0')
data['rating'] = data['rating'].astype('float')
data.sort_values(by='rating', ascending=False).head(10).to_csv('top_10_airbnb_rating.csv')

#### 3.2) Finding top 10 based on Reviews

In [98]:
data['reviews'] = data['reviews'].astype('int')

data.sort_values(by='reviews', ascending=False).head(10).to_csv('top_10_airbnb_reviews.csv')

#### 3.3) Finding top 10 based on Reviews as well as Rating

In [101]:
data['rating_and_review_filter'] = data['rating'] * data['reviews']

data.sort_values(by = 'rating_and_review_filter', ascending = False).head(10)

Unnamed: 0,id,name,rating,reviews,address,features,amenities,safety_rules,hourse_rules,price,country,bathrooms,beds,guests,toiles,bedrooms,studios,cities,rating_and_review_filter
11993,6906342,The Abode - Shobla Pine Chalet - Room with bal...,4.72,105,"Manali, Himachal Pradesh, India","3 guests,1 bedroom,1 bed,1 private bathroom","Mountain view,Waterfront,Wifi,Free parking on ...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: After 12:00 pm,Check out: 11:00 am,S...",2853,India,1,1,3,0,1,0,Manali,495.6
5369,34613162,Quaint and Cozy rooms - Mokshastays,4.97,71,"Manali, Himachal Pradesh, India","2 guests,1 bedroom,1 bed,1.5 bathrooms","Mountain view,Valley view,Wifi,Free parking on...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: 12:00 pm - 10:00 pm,Check out: 11:00...",3138,India,1,1,2,0,1,0,Manali,352.87
1794,30724547,Suite - Cozy and comfortable for Friends & Family,4.81,59,"Manali, Himachal Pradesh, India","5 guests,1 bedroom,2 beds,1 private bathroom","Wifi,Dedicated workspace,Free parking on premi...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,S...","Check-in: 12:00 pm - 2:00 pm,Check out: 11:00 ...",1610,India,1,2,5,0,1,0,Manali,283.79
1784,24978026,Apple Orchard House,4.78,56,"Manali, Himachal Pradesh, India","2 guests,1 bedroom,1 bed,1 bathroom","Mountain view,Valley view,Kitchen,Pets allowed...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: Flexible,Check out: 1:00 am,No smoki...",913,India,1,1,2,0,1,0,Manali,267.68
5377,32545702,"Beautiful & Peaceful Snow River, Manali",4.81,53,"Manali, Himachal Pradesh, India","12 guests,4 bedrooms,4 beds,4.5 bathrooms","Mountain view,River view,Waterfront,Wifi,Free ...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: After 12:00 pm,Check out: 11:00 am,S...",1162,India,4,4,12,0,4,0,Manali,254.93
3173,13185211,The Abode - Shobla Pine Chalet - Room with bal...,4.75,52,"Manali, Himachal Pradesh, India","3 guests,1 bedroom,1 bed,1 private bathroom","Waterfront,Wifi,Free parking on premises,Pets ...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: After 12:00 pm,Check out: 11:00 am,P...",2853,India,1,1,3,0,1,0,Manali,247.0
5403,48928829,Mountain view Ride Inn Deluxe room in Manali,4.9,49,"Manali, Himachal Pradesh, India","3 guests,1 bedroom,1 bed,1 private bathroom","Wifi,Free parking on premises,TV with standard...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: After 1:00 pm,Check out: 11:00 am,No...",2199,India,1,1,3,0,1,0,Manali,240.1
5407,48819740,"Beas River & Mt. View Superior Rm Ride Inn, Ma...",4.83,49,"Manali, Himachal Pradesh, India","3 guests,1 bedroom,1 bed,1 private bathroom","Wifi,Free driveway parking on premises,TV with...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: After 1:00 pm,Check out: 11:00 am,No...",2742,India,1,1,3,0,1,0,Manali,236.67
5359,50716559,"Luxurious Room With A Hottub, Olive Manali Cot...",4.82,45,"Manali, District- Kullu, Himachal Pradesh, India","2 guests,1 bedroom,1 bed,1 private bathroom","Mountain view,Valley view,Waterfront,Wifi,Dedi...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: 12:00 pm - 2:00 pm,Check out: 10:00 ...",2853,India,1,1,2,0,1,0,Manali,216.9
1806,31061284,Cider Chalet-E | 2BRK Apartment-SnowMountain View,4.65,43,"Manali, Himachal Pradesh, India","6 guests,2 bedrooms,2 beds,2 bathrooms","Mountain view,Garden view,Waterfront,Kitchen,W...","󹀁,Airbnb's COVID-19 safety practices apply,󱠃,N...","Check-in: After 1:00 pm,Check out: 10:00 am,Se...",3278,India,2,2,6,0,2,0,Manali,199.95


### 4. Top 10 Airbnbs in India based on Rating*Reviews

In [131]:
import pandas as pd
from tqdm.auto import tqdm


df = pd.read_csv('/content/datasets/Airbnb/airbnb_v2.csv')
df['country'] = df['country'].str.strip()
countries = df['country'].unique()


for country in tqdm(countries):

  df = pd.read_csv('/content/datasets/Airbnb/airbnb_v2.csv')

  df['country'] = df['country'].str.strip()
  df = df[df['country'] == country]

  df['rating'] = df['rating'].str.replace('New','0')
  df['rating'] = df['rating'].astype('float')

  df['reviews'] = df['reviews'].str.replace(',','').astype('int')

  df['rating_and_review_filter'] = df['rating'] * df['reviews']

  df.sort_values(by = 'rating_and_review_filter', ascending = False).head(10).to_csv('csvs/' + country + '.csv')

  0%|          | 0/119 [00:00<?, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rating'] = df['rating'].str.replace('New','0')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['rating'] = df['rating'].astype('float')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['reviews'] = df['reviews'].str.replace(',','').astype('int')
A value is trying to be set on a copy of a sli

### 5. Saving the Zip

In [147]:
import zipfile
import os

def zip_folder(folder_path, output_path):

  with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(folder_path):
      for file in files:
        file_path = os.path.join(root, file)
        arcname = os.path.relpath(file_path, folder_path)
        zipf.write(file_path, arcname)


folder_path = 'csvs'  # Replace with the actual folder path
output_path = 'csvs.zip'  # Replace with the desired output path
zip_folder(folder_path, output_path)