# Exploratory Data Analysis

## Importing libraries and creating a dataframe

In [1]:
from pathlib import Path
import pandas as pd

In [2]:
file_path = Path.home()/'PycharmProjects/pythonProject/accra_avail_hotels.csv'

In [3]:
df = pd.read_csv(file_path, encoding='latin1')

## Cleaning the data

In [4]:
df

Unnamed: 0,Name,Price,Rating
0,Lancaster Accra,137200,7.6
1,Accra Luxury Homes @ East Legon,78719,9.6
2,Ibis Styles Accra Airport,113190,7.3
3,Villa Monticello Boutique Hotel,220412,8.0
4,Airport View Hotel,89875,7.2
...,...,...,...
347,Movinpick Ambassador Hotel,181276,
348,East Airport Suites4 -Private room,73745,9.3
349,RT Properties 1 bedroom duplex @The gardens,205800,
350,East Airport Suite 1,73745,7.3


In [5]:
# converting the object datatype in the Price column to an Integer
df['Price'] = df['Price'].astype('str').str.extractall('(\d+)').unstack().fillna('').sum(axis=1).astype(int)

In [6]:
# converting the object datatype in Rating column to a float(decimal) and also converting empty cells(NaN) to 0.0
df['Rating'] = df['Rating'].fillna(0.0)

In [7]:
# the resulting clean data frame
df

Unnamed: 0,Name,Price,Rating
0,Lancaster Accra,137200,7.6
1,Accra Luxury Homes @ East Legon,78719,9.6
2,Ibis Styles Accra Airport,113190,7.3
3,Villa Monticello Boutique Hotel,220412,8.0
4,Airport View Hotel,89875,7.2
...,...,...,...
347,Movinpick Ambassador Hotel,181276,0.0
348,East Airport Suites4 -Private room,73745,9.3
349,RT Properties 1 bedroom duplex @The gardens,205800,0.0
350,East Airport Suite 1,73745,7.3


## Analysing the data

### List of available hotels in accra between sept 16 - 18, 2022 with a good to excellent rating and also affordable 

In [8]:
# creating a new dataframe based on the the question we are trying to solve 
result_df = df[(df.Price < 100000) & (df.Rating >= 6)]

In [9]:
# sorting the data frame by the highest rating and lowest price
result_df = result_df.sort_values(['Rating', 'Price'], ascending=[False, True])

In [10]:
# resulting dataframe
result_df

Unnamed: 0,Name,Price,Rating
157,The Prestige Homes,30013,10.0
72,Glorious Luxury Home at Gallery Apartments,67914,10.0
122,The VVIP Luxury Apartments@The Gallery,92610,10.0
127,The VVIP Luxury Apartments@The Gallery,92610,10.0
107,Tricia's PLACE,97755,10.0
...,...,...,...
248,"SPACIOUS APARTMENT, SLEEPS UP TO 8 PEOPLE, 2 B...",61954,6.4
261,"SPACIOUS APARTMENT, SLEEPS UP TO 8 PEOPLE, 2 B...",61954,6.4
65,SBJ Place,43990,6.0
180,PLSA -East legon,77175,6.0


In [11]:
# further cleaning the dataframe by removing duplicate records 
result_df = result_df.drop_duplicates(keep='first')

In [12]:
# resulting data frame
result_df

Unnamed: 0,Name,Price,Rating
157,The Prestige Homes,30013,10.0
72,Glorious Luxury Home at Gallery Apartments,67914,10.0
122,The VVIP Luxury Apartments@The Gallery,92610,10.0
107,Tricia's PLACE,97755,10.0
1,Accra Luxury Homes @ East Legon,78719,9.6
...,...,...,...
221,Liana Hotel,48878,6.4
248,"SPACIOUS APARTMENT, SLEEPS UP TO 8 PEOPLE, 2 B...",61954,6.4
65,SBJ Place,43990,6.0
180,PLSA -East legon,77175,6.0


In [13]:
# saving the data frame to a csv file
result_df.to_csv('accra_top_rated_afford_hotel.csv', index=False, encoding='utf-8')