In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import scipy.stats as stats
from scipy.stats import ttest_ind, shapiro, mannwhitneyu, chi2_contingency

from google.oauth2 import service_account
from googleapiclient.discovery import build

import folium
from folium.plugins import MarkerCluster

# Set display option to show all columns
pd.set_option('display.max_columns', None)

# Get rid of FutureWarning
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Path to downloaded Service Account JSON file
json_key_file = '/Users/timursalakhetdinov/Applied Statistical Analysis/4. Distribution/banded-anvil-309310-3642b97f7801.json'

In [2]:
# Google Sheets document URL and spreadsheet ID
spreadsheet_id = '1ecopK6oyyb4d_7-QLrCr8YlgFrCetHU7-VQfnYej7JY'

# Authenticate using OAuth 2.0 JSON file
creds = service_account.Credentials.from_service_account_file(
    json_key_file,
    scopes=["https://www.googleapis.com/auth/spreadsheets.readonly"]
)
service = build('sheets', 'v4', credentials=creds)

# Get the list of all sheets in the spreadsheet
spreadsheet = service.spreadsheets().get(spreadsheetId=spreadsheet_id).execute()
sheet_names = [sheet['properties']['title'] for sheet in spreadsheet['sheets']]

# Load each sheet and add city name and day type as columns
all_sheets = []
for sheet_name in sheet_names:
    # Fetch data from Google Sheets
    result = service.spreadsheets().values().get(spreadsheetId=spreadsheet_id, range=sheet_name).execute()
    values = result.get('values', [])
    
    if values:
        # Convert to DataFrame
        df = pd.DataFrame(values[1:], columns=values[0])  # Assuming first row as header

        # Drop the first column which contains the index
        df = df.drop(df.columns[0], axis=1)

        # Extract city name and day type from sheet name
        city_name, day_type = sheet_name.split('_')
        df['city'] = city_name.capitalize()
        df['day_type'] = day_type.capitalize()  # Capitalize to get 'Weekdays' or 'Weekend'

        # Append DataFrame to the list
        all_sheets.append(df)

# Combine all sheets into a single DataFrame
combined_df = pd.concat(all_sheets, ignore_index=True).reset_index(drop=True)

# Display the combined DataFrame
display(combined_df.head())

Unnamed: 0,realSum,room_type,room_shared,room_private,person_capacity,host_is_superhost,multi,biz,cleanliness_rating,guest_satisfaction_overall,bedrooms,dist,metro_dist,attr_index,attr_index_norm,rest_index,rest_index_norm,lng,lat,city,day_type
0,194.0336981,Private room,False,True,2,False,1,0,10,93,1,5.022963798,2.539380003,78.69037927,4.166707868,98.25389587,6.846472824,4.90569,52.41772,Amsterdam,Weekdays
1,344.245776,Private room,False,True,4,False,0,0,8,85,1,0.4883892888,0.2394039228,631.1763783,33.42120862,837.2807567,58.34292774,4.90005,52.37432,Amsterdam,Weekdays
2,264.1014224,Private room,False,True,2,False,0,1,9,87,1,5.748311915,3.651621289,75.27587691,3.9859077,95.38695493,6.646700255,4.97512,52.36103,Amsterdam,Weekdays
3,433.529398,Private room,False,True,4,False,0,1,9,90,2,0.3848620128,0.4398760761,493.2725344,26.11910845,875.0330976,60.97356517,4.89417,52.37663,Amsterdam,Weekdays
4,485.5529257,Private room,False,True,2,True,0,0,10,98,1,0.5447381834,0.3186926468,552.8303244,29.272733,815.30574,56.81167696,4.90051,52.37508,Amsterdam,Weekdays
