In [None]:
#Nescessarry packages
import numpy as np
import pandas as pd
from numpy import save
from numpy import load
import random
import matplotlib.pyplot as plt
from scipy.stats import wishart
from numpy.random import multivariate_normal
from tqdm import tqdm
import math
from matplotlib import cm
from sklearn.metrics import mean_squared_error, mean_absolute_error

import seaborn as sns

Downloading datasets

In [None]:
book_df = pd.read_csv('Data/Books.csv')
ratings_df=pd.read_csv('Data/Ratings.csv')
user_df=pd.read_csv('Data/Users.csv')

### Data summary of files:

In [None]:
print("Number of books (items):", len(book_df))
print("Number of users (items):", len(user_df))
print("The matrix will have size",len(book_df)*len(user_df))
print("Where there are", len(ratings_df), "entries")

## Preprocess of data

In [None]:
ser_rating_df = ratings_df.merge(user_df, left_on = 'User-ID', right_on = 'User-ID')
book_user_rating = book_df.merge(user_rating_df, left_on = 'ISBN',right_on = 'ISBN')
book_user_rating = book_user_rating[['ISBN', 'Book-Title', 'Book-Author','Year-Of-Publication','Publisher','Location','Age', 'User-ID', 'Book-Rating']]
book_user_rating.reset_index(drop=True, inplace = True)
#Removing all implicit ratings
book_user_rating=book_user_rating[book_user_rating['Book-Rating']>0]
#Removing all books with 1 rating, and users that have rated 1 item
book_counts = pd.DataFrame(book_user_rating["ISBN"].value_counts())
User_counts = pd.DataFrame(book_user_rating["User-ID"].value_counts())
User = User_counts[User_counts["User-ID"] <= 1].index
rare_book = book_counts[book_counts["ISBN"] <= 1].index
Sorted_rating=book_user_rating[~book_user_rating['ISBN'].isin(rare_book)]
Sorted_rating=Sorted_rating[~Sorted_rating["User-ID"].isin(User)]

#Some manual changes:
Sorted_rating.loc[Sorted_rating.ISBN == '2070426769','yearOfPublication'] = 2003
Sorted_rating.loc[Sorted_rating.ISBN == '2070426769','bookAuthor'] = "Jean-Marie Gustave Le ClÃ?Â©zio"
Sorted_rating.loc[Sorted_rating.ISBN == '2070426769','publisher'] = "Gallimard"
Sorted_rating.loc[Sorted_rating.ISBN == '2070426769','bookTitle'] = "Peuple du ciel, suivi de 'Les Bergers"
Sorted_rating.loc[Sorted_rating.ISBN == '2070426769',:]

In [None]:
#Reindexing the book and user ID
d ={}
for i,j in enumerate(Sorted_rating.ISBN.unique()):
    d[j] =i
Sorted_rating['ID-Book-key'] = Sorted_rating['ISBN'].map(d)

d ={}
for i,j in enumerate(Sorted_rating['User-ID'].unique()):
    d[j] =i
Sorted_rating['ID-User-key'] = Sorted_rating['User-ID'].map(d)


In [None]:
#Statistics after initial preprocess
print('Number of users: ',Sorted_rating['ID-User-key'].nunique())
print('Number of Items: ',Sorted_rating['ID-Book-key'].nunique())

## Data exploratory analysis:

In [None]:
#Plotting histogram of all explicit ratings, and find the average rating:
viridis = cm.get_cmap('viridis', 20)#(np.linspace(0.9, 0.1, 10))

colors = viridis(np.linspace(1,0, 20))

plt.style.use('ggplot')
plt.hist(Sorted_rating['Book-Rating'], bins=np.arange(0.5, 11.5, 1), color=colors[16], edgecolor=colors[9])
plt.xticks(np.arange(0, 10+1, 1.0))
plt.xlabel('Ratings')
plt.axvline(x = Sorted_rating['Book-Rating'].mean(), color = colors[2], ls='--', label = 'Average = 7.683')
plt.legend(loc="upper left", facecolor='white')
plt.savefig('Figures/Hist_ratings.png')

Sorted_rating['Book-Rating'].mean()

In [None]:
#Top 10 books:

ratings_count = pd.DataFrame(Sorted_rating.groupby('ISBN',as_index=False)['Book-Rating'].count())

top10=ratings_count.sort_values('Book-Rating', ascending = False).head(5)

top10.columns = ['ISBN','Book_sum']
top_rated_books=pd.merge(top10, book_df, on='ISBN', how='left')
ratings_mean = pd.DataFrame(Sorted_rating.groupby('ISBN',as_index=False)['Book-Rating'].mean())

top_rated_books=pd.merge(top_rated_books, ratings_mean, on='ISBN', how='left')
top_rated_books.reset_index(drop=True, inplace=False)
print(top_rated_books[['Book-Title','Book-Author','Book_sum','Book-Rating']].to_latex(index=False,  multirow = True))

In [None]:
#Investigating publication year
Sorted_rating['Year-Of-Publication'].unique()


book_df.loc[book_df['ISBN'].isin(Sorted_rating[Sorted_rating['Year-Of-Publication']==0]['ISBN'].tolist())]

y1 = Sorted_rating[Sorted_rating['Year-Of-Publication'] >1960]
y1 = y1[y1['Year-Of-Publication'] <= 2005]
sns.distplot(y1['Year-Of-Publication'], color=viridis[0])
plt.xlabel('Year Of Publication',size=14)
plt.savefig('Figures/Hist_year.png')

In [None]:
#Preprocess of publication year
new_df=Sorted_rating.groupby(['Year-Of-Publication','ISBN'], as_index=False).size()
new_df[new_df['Year-Of-Publication']==0] #906 vi ikke kender
new_df[new_df['Year-Of-Publication']>2004] #12
Sorted_rating['Year-Of-Publication'] = Sorted_rating['Year-Of-Publication'].astype(str).astype(int)
Sorted_rating['Year-Of-Publication'].replace({'0':Sorted_rating['Year-Of-Publication'].value_counts().idxmax()},inplace=True)
Sorted_rating.loc[Sorted_rating['Year-Of-Publication'] > 2005,'Year-Of-Publication'] = Sorted_rating['Year-Of-Publication'].value_counts().idxmax()


In [None]:
#Investigating top publishers:

new_df=Sorted_rating.groupby(['Publisher','ISBN'], as_index=False).size()
ratings_count = pd.DataFrame(new_df.groupby('Publisher',as_index=False)['ISBN'].count())
top15=ratings_count.sort_values('ISBN', ascending = False).head(15)
viridis=sns.color_palette("viridis", 15)

fig, ax = plt.subplots(figsize=(6, 6))

sns.barplot(x="ISBN",y="Publisher",data=top15,palette=viridis)
sns.set()
plt.xlabel("No. of Books Published")
# new helper method to auto-label bars
ax.bar_label(ax.containers[0], label_type='center', color='w')

plt.tight_layout()
plt.savefig('Figures/Publishers.png')


In [None]:
#Investigating of age of users:
new_df=Sorted_rating.groupby(['ID-User-key', 'Location','Age'], as_index=False).count()
new_df['Location'].isna().sum()

new_df[new_df['Age']>95].shape
a=new_df['Age'].unique()
print(sorted(a))
new_df[new_df['Age']<15].shape

viridis=sns.color_palette("viridis", 10)

y1 = new_df[new_df['Age'] <=95]
y1 = y1[y1['Age'] >0]
sns.distplot(y1['Age'], color=viridis[0])
plt.xlabel('Age',size=14)
plt.savefig('Figures/Hist_age_alt.png')

In [None]:
#Preprocess of age:
Sorted_rating.loc[Sorted_rating['Age'] > 90,'Age'] = 90
Sorted_rating.loc[Sorted_rating['Age'].isna(),'Age']=Sorted_rating['Age'].value_counts().idxmax()
Sorted_rating.loc[Sorted_rating['Age'] < 15,'Age'] = 15

In [None]:
#preprocess of location:
Sorted_rating[['city','state','country','nan','nan']]=Sorted_rating['Location'].apply(lambda x: pd.Series(str(x).split(",")))
Sorted_rating.drop(['Location','nan'],axis=1,inplace=True)
new_df=Sorted_rating.groupby(['ID-User-key', 'city','state', 'country'], as_index=False).count()
new=new_df.groupby(['country'], as_index=False).sum()
#Printing to excel to map city to country, in order to create the below code
new.to_excel('output.xlsx', index=False) 


In [None]:
#Manually setting country of following:
Sorted_rating.loc[Sorted_rating['country']=='','country']=Sorted_rating.loc[Sorted_rating['country']=='','state']
Sorted_rating.loc[Sorted_rating['country']=='','country']=Sorted_rating.loc[Sorted_rating['country']=='','city']
Sorted_rating.loc[Sorted_rating['country']==' ','country']=Sorted_rating.loc[Sorted_rating['country']==' ','city']
Sorted_rating.loc[Sorted_rating['country']==' n/a', 'country']=Sorted_rating.loc[Sorted_rating['country']==' n/a','city']
Sorted_rating.loc[Sorted_rating['country'].isna(),'country']=Sorted_rating.loc[Sorted_rating['country'].isna(),'city']

Sorted_rating.loc[Sorted_rating['ID-User-key'].isin([1, 20855, 23222, 23844]),'country']=' usa'
Sorted_rating.loc[Sorted_rating['ID-User-key']==25852,'country']=' england'
Sorted_rating.loc[Sorted_rating['ID-User-key']==23705,'country']=' usa'


Sorted_rating.loc[Sorted_rating['country']==' us','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' everywhere and anywhere','country']=' usa#'
Sorted_rating.loc[Sorted_rating['country']==' america','country']=' usa'

Sorted_rating.loc[Sorted_rating['country']==' cambridgeshire','country']=' england'
Sorted_rating.loc[Sorted_rating['country']==' california','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' catalunya','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' catalunya','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' catalonia','country']=' spain'



Sorted_rating.loc[Sorted_rating['country']==' catalunya spain','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' a coruña','country']=' spain'

Sorted_rating.loc[Sorted_rating['country']==' a coruña','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' aberdeenshire','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' alabama','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' alaska','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' alberta','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']==' andalucia','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' arizona','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' asturias','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' badajoz','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' baden-wuerttemberg','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' baden-württemberg','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' bayern','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' berlin','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' bremen','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' cheshire','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' co. limerick','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' colorado','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' connecticut','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' dc','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' district of columbia','country']=' columbia'
Sorted_rating.loc[Sorted_rating['country']==' england','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' españa','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' estremadura','country']=' portugal'
Sorted_rating.loc[Sorted_rating['country']==' eu','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' euskal herria','country']=' basque country'
Sorted_rating.loc[Sorted_rating['country']==' far away...','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' florida','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' fort bend','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' framingham','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' galicia','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' germany"','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' gloucestershire','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' grenada','country']='spain'
Sorted_rating.loc[Sorted_rating['country']==' hamburg','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' haute-garonne','country']=' france'
Sorted_rating.loc[Sorted_rating['country']==' hawaii','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' idaho','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' ile de france','country']=' france'
Sorted_rating.loc[Sorted_rating['country']==' ilfov','country']=' romania'
Sorted_rating.loc[Sorted_rating['country']==' illinois','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' indiana','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' iowa','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' ireland','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' italia','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']==' iwakuni','country']=' japan'
Sorted_rating.loc[Sorted_rating['country']==' jalisco','country']=' mexico'
Sorted_rating.loc[Sorted_rating['country']==' k1c7b1','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' kansas','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' kent','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' kentucky','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' l`italia','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']==' la france','country']=' france '
Sorted_rating.loc[Sorted_rating['country']==' liege','country']=' belgium'
Sorted_rating.loc[Sorted_rating['country']==' lisboa','country']=' portugal'
Sorted_rating.loc[Sorted_rating['country']==' ljubljanska regija','country']=' slovenia'
Sorted_rating.loc[Sorted_rating['country']==' lombardia','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']==' london','country']='england'
Sorted_rating.loc[Sorted_rating['country']==' louisiana','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' maryland','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' massachusetts','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' metro manila','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']==' michigan','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' midlothian','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' minnesota','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' mississippi','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' missouri','country']='usa '
Sorted_rating.loc[Sorted_rating['country']==' nebraska','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' nevada','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' new brunswick','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']==' new hampshire','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']==' new jersey','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' new mexico','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' new south wales','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']==' new york','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' new zealand"','country']=' new zealand'
Sorted_rating.loc[Sorted_rating['country']==' noord-holland','country']=' netherlands'
Sorted_rating.loc[Sorted_rating['country']==' nordrhein-westfalen','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']==' north carolina','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' north yorkshire','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' nova scotia','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']==' nsw','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']==' nz','country']=' new zealand'
Sorted_rating.loc[Sorted_rating['country']==' oeiras','country']=' portugal'
Sorted_rating.loc[Sorted_rating['country']==' ohio','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' okinawa','country']=' japan'
Sorted_rating.loc[Sorted_rating['country']==' oklahoma','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' ontario','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' oregon','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' orense','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' overijssel','country']=' netherlands'
Sorted_rating.loc[Sorted_rating['country']==' penang','country']=' malaysia'
Sorted_rating.loc[Sorted_rating['country']==' pennsylvania','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' phillipines','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']==' piemonte','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']==' pontevedra','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' porto','country']=' portugal'
Sorted_rating.loc[Sorted_rating['country']==' quit','country']=' ecuador'
Sorted_rating.loc[Sorted_rating['country']==' rhode island','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' rhône alpes','country']=' france'
Sorted_rating.loc[Sorted_rating['country']==' rutherford','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' san josé','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' saskatchewan','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']==' scotland','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' seoul','country']=' south korea'
Sorted_rating.loc[Sorted_rating['country']==' shelby','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' south australia','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']==' south carolina','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' south dakota','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' spain"','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' switzerland"','country']=' switzerland'
Sorted_rating.loc[Sorted_rating['country']==' são vicente','country']=' cape verde '
Sorted_rating.loc[Sorted_rating['country']==' tennessee','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' texas','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' tn','country']=' usa '
Sorted_rating.loc[Sorted_rating['country']==' tx','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' u.k.','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' u.s.a.','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' u.s>','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' united kindgonm','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' united kingdom','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' united state','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' united states','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' universe','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' utah','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' victoria','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']==' virginia','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' vlaams-brabant','country']=' belgium'
Sorted_rating.loc[Sorted_rating['country']==' wales','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' washington','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' west indies','country']=' cuba'
Sorted_rating.loc[Sorted_rating['country']==' west yorkshire','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' western cape','country']=' south africa'
Sorted_rating.loc[Sorted_rating['country']==' wicklow','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' wien','country']=' austria'
Sorted_rating.loc[Sorted_rating['country']==' wiltshire','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' wisconsin','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' wyoming','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' ysa','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' zh','country']=' netherlands'
Sorted_rating.loc[Sorted_rating['country']==' zuid holland','country']=' netherlands'
Sorted_rating.loc[Sorted_rating['country']==' zuid-holland','country']=' netherlands'

Sorted_rating.loc[Sorted_rating['country']==' alderney','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']==' columbia','country']=' columbia'
Sorted_rating.loc[Sorted_rating['country']==' france ','country']=' france'
Sorted_rating.loc[Sorted_rating['country']==' usa ','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' van wert','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='england','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='usa ','country']=' usa'

Sorted_rating.loc[Sorted_rating['country']==' jersey','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' n/a - on the road','country']=' usa'

Sorted_rating.loc[Sorted_rating['country']==' british columbia','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']==' columbia','country']=' colombia'
Sorted_rating.loc[Sorted_rating['country']==' guernsey','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='spain','country']=' spain'

Sorted_rating.loc[Sorted_rating['country']=='paris','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='prague','country']=' czech republic'
Sorted_rating.loc[Sorted_rating['country']=='singapore','country']=' singapore'
Sorted_rating.loc[Sorted_rating['country']=='tarlac','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']=='ulsan','country']=' south korea'
Sorted_rating.loc[Sorted_rating['country']=='zurich','country']=' switzerland'

Sorted_rating.loc[Sorted_rating['country']=='bucuresti','country']=' romania'
Sorted_rating.loc[Sorted_rating['country']=='goteborg','country']=' sweden'
Sorted_rating.loc[Sorted_rating['country']=='nuku`alofa','country']=' tonga'

Sorted_rating.loc[Sorted_rating['country']==' burma','country']=' myanmar'
Sorted_rating.loc[Sorted_rating['country']==' basque country','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' canary islands','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']==' usa#','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='addison','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='adelaide','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='alabaster','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='albuquerque','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='alburquerque','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']=='alexandria','country']=' egypt'
Sorted_rating.loc[Sorted_rating['country']=='aloha','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='alpharetta','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='altamont','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='alvin','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='anchorage','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='ann arbor','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='apex','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='apopka','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='appleton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='arco','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='arlington','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='arnold','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='asheville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='atlanta','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='auburn','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='audubon','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='augsburg','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='augusta','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='aurora','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='austin','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='aylmer','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='baldwin','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='baltimore','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='barcelona','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']=='baton rouge','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bay city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bay minette','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bay village','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='beaverton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='berkeley','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='berkley','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='berlin','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='bethel','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bettendorf','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='binghamton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='birmingham','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='boise','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='boston','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bothell','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bourbonnais','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bradenton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='brantford','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='brattleboro','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bremerton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='brisbane','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='bristol','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='broken arrow','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bronx','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='brooklyn','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='brownsville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='buffalo','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='bullhead city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='burlington','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='burnaby','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='caldwell','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='calgary','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='cambridge','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='camden','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='canberra','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='canton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='carmichael','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='carnegie','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='caruthersville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cary','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cedar rapids','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='centennial','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cesena','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='chandler','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='chapel hill','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='charleston','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='charlotte','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='chattanooga','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='chesapeake','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='chester','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='chesterfield','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='chicago','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='chilliwack','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='chula vista','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='ciempozuelos','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']=='cincinnati','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cleveland heights','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='college park','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cologne','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='colorado springs','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='columbia','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='columbiana','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='columbus','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='corvallis','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cotati','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='courtenay','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='covington','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='cranston','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='crystal lake','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='dallas','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='dalton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='dauphin','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='de soto','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='dearborn','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='deming','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='denver','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='des moines','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='detroit','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='didsbury','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='dixon','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='douglas','country']=' isle of man'
Sorted_rating.loc[Sorted_rating['country']=='dublin','country']=' ireland'
Sorted_rating.loc[Sorted_rating['country']=='dumfries','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='dunedin','country']=' new zealand'
Sorted_rating.loc[Sorted_rating['country']=='dyersburg','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='east meadow','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='edinburgh','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='edmonton','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='el cajon','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='elizabeth city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='encinitas','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='eugene','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='eustis','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='evanston','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='evergreen','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='ewing','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='exton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fair oaks','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fairbanks','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fairfax','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fairfield','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='flint','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='florissant','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='foster city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fountain valley','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='framingham','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='frankfurt','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='frederick','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fredericton','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='freeport','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fremont','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fresno','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='fürth','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='gap','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='georgetown','country']=' guyana'
Sorted_rating.loc[Sorted_rating['country']=='gig harbor','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='glendale','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='glenside','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='grayslake','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='green bay','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='greenbelt','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='greenville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='halifax','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='hamburg','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='hamilton','country']=' bermuda'
Sorted_rating.loc[Sorted_rating['country']=='harrisburg','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='harrison','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='hartford','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='hayden','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='henderson','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='herndon','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='hillsboro','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='hollister','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='holly springs','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='holtsville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='homer','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='homestead','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='houma','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='houston','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='humble','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='huntsville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='iowa city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='ipoh','country']=' malaysia'
Sorted_rating.loc[Sorted_rating['country']=='irvine','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='islamabad','country']=' pakistan'
Sorted_rating.loc[Sorted_rating['country']=='ithaca','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='jacksonville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='jamestown','country']=' saint helena, ascension, and tristan da cunha'
Sorted_rating.loc[Sorted_rating['country']=='jasper','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='joplin','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kaiserslautern','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='kalamazoo','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kamloops','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='kansas city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kelowna','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='kenmore','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kennesaw','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='killeen','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kincardine','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='king of prussia','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kingston','country']=' jamaica'
Sorted_rating.loc[Sorted_rating['country']=='kirkwood','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='knoxville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='kuala lumpur','country']=' malaysia'
Sorted_rating.loc[Sorted_rating['country']=='la quinta','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lacey','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lagos','country']=' nigeria'
Sorted_rating.loc[Sorted_rating['country']=='lagrange','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lakeland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lakewood','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lancaster','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='langley','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='las vegas','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lawrence','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='league city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='leduc','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='littleton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='livermore','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='livonia','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='llanelli','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='london','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='long branch','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='longwood','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='los angeles','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='los gatos','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='louisville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='lynchburg','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='madera','country']=' mexico'
Sorted_rating.loc[Sorted_rating['country']=='madison','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='mahwah','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='managua','country']=' nicaragua'
Sorted_rating.loc[Sorted_rating['country']=='manchester','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='mannheim','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='marion','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='martinez','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='marysville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='melbourne','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='memphis','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='mercer island','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='mesa','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='metairie','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='miami','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='middle island','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='milan','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='milpitas','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='milton','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='milwaukee','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='milwaukie','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='mississauga','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='moberly','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='modesto','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='molalla','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='moline','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='monona','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='monroe','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='montpelier','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='morrisville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='mountlake terrace','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='murphysboro','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='nancy','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='napa','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='naracoorte','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='nashua','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new bern','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new brunswick','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new castle','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new freedom','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new hope','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new port richey','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new windsor','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='new york','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='newark','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='newport beach','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='norfolk','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='north augusta','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='north fort myers','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='north haven','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='north miami','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='oak harbor','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='oak ridge','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='oakland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='oakville','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='ocala','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='oceanport','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='odessa','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='ojai','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='olivehurst','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='olympia','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='omaha','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='orlando','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='orleans','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='oshkosh','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='ottawa','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='oxford','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='pacifica','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='padova','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='palatine','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='palo alto','country']=' mexico'
Sorted_rating.loc[Sorted_rating['country']=='parkersburg','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='parkville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='pasadena','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='pelham','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='pensacola','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='peoria','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='perugia','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='philadelphia','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='phoenix','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='piedmont','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='pisa','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='pittsburg','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='plaistow','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='pleasant hill','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='port washington','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='portales','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='portland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='powell','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='price','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='prospect','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='pueblo','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='racine','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='raleigh','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='reading','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='redlands','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='regina','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='reno','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='reserve','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rhome','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='richardson','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='richland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='richmond','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='richmond hill','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='rochester','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rock hill','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rockaway','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rockford','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rockland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rolla','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='roma','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rome','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='romeoville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='round rock','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='rowlett','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='roy','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='sacramento','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='salem','country']=' india'
Sorted_rating.loc[Sorted_rating['country']=='salerno','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='salt lake city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='san antonio','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='san diego','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='san francisco','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='san luis obispo','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='san marcos','country']=' guatemala'
Sorted_rating.loc[Sorted_rating['country']=='san mateo','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']=='santa ana','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='santa cruz','country']=' bolivia'
Sorted_rating.loc[Sorted_rating['country']=='santa maria','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']=='santa monica','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='santa rosa','country']=' philippines'
Sorted_rating.loc[Sorted_rating['country']=='sarasota','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='saskatoon','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='seattle','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='sebastopol','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='simi valley','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='skippack','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='smyrna','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='somerset','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='somerville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='sonora','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='south portland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='spencer','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='spokane','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='springfield','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='st. catharines','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='st. charles','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='st. louis','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='st. paul','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='staten island','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='sterling','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='stockbridge','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='stockton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='stockton-on-tees','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='stuttgart','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='sugar land','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='summerfield','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='superior','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='surrey','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='sutton','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='syracuse','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tallahassee','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tempe','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='temple','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tewksbury','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='three rivers','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tigard','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tolland','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='topeka','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='toronto','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='torrington','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='traverse city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='treviso','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='troy','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='trumbull','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tualatin','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tucker','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tucson','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tulsa','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='tustin','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='twyford','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='tynemouth','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='urbana','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='utrecht','country']=' netherlands'
Sorted_rating.loc[Sorted_rating['country']=='valencia','country']=' venezuela'
Sorted_rating.loc[Sorted_rating['country']=='valley center','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='vancouver','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='vernon','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='victoria','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='vienna','country']=' austria'
Sorted_rating.loc[Sorted_rating['country']=='villejuif','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='virginia beach','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='walla walla','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='walled lake','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='waltham','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='washington','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='waterloo','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='west linn','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='west valley city','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='wichita','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='wilkes-barre','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='wilsonville','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='windsor','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='wismar','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='woodbridge','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='woodstock','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='worcester','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='york','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='youngstown','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='yuma','country']='  usa'
Sorted_rating.loc[Sorted_rating['country']=='aldergrove','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='allston','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='anaheim hills','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='bad axe','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='beaverdell','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='beaverville','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='berwyn heights','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='blackwood','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='bloomer','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='bloomfield po','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='bobcaygeon','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='bocabec','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='caddo','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='cascade','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='celista','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='charlotteown','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='chatelaillon-plage','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='cheriton','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='citronelle','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='clemons','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='coconut grove','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='commerce township','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='conneautville','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='costa da caparica','country']=' portugal'
Sorted_rating.loc[Sorted_rating['country']=='creston bc','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='edisto island','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='elfin forest','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='elkins park','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='ferris','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='fife. wa','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='frankfurt am main','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='freeburg','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='garden valley','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='grifton','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='johntown','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='kailua-kona','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='keaau','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='kendal','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='kopperl','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='köln','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='la jolla','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='laronge','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='leroy','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='little neck','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='llavaneres (barcelona)','country']=' spain'
Sorted_rating.loc[Sorted_rating['country']=='lochgilphead','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='loro ciuffenna fraz. s.giustino v.no','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='lynfield / auckland','country']=' new zealand'
Sorted_rating.loc[Sorted_rating['country']=='manotick','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='manton','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='maple falls','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='metamora','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='middle village','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='milano','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='millersburg','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='monte vista','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='montreal','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='muenchen','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='münchen','country']=' germany'
Sorted_rating.loc[Sorted_rating['country']=='new york city','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='newalla','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='newport coast','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='north apollo','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='north hollywood','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='north wales','country']=' uk'
Sorted_rating.loc[Sorted_rating['country']=='oak leaf','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='patterson lakes','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='pennsylvania','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='philadelphia area','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='port hadlock','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='portola valley','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='potomac falls','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='ray','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='red feather lakes','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='reynolds','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='s charleston','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='saanichton','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='saint louis','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='sainte-foy','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='section','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='seven mile beach','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='shasta lake city','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='somewhere','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='south saint paul','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='studio city','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='thunderbay','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='toora','country']=' australia'
Sorted_rating.loc[Sorted_rating['country']=='torino','country']=' italy'
Sorted_rating.loc[Sorted_rating['country']=='vanderhoof','country']=' canada'
Sorted_rating.loc[Sorted_rating['country']=='vergennes','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='villa ridge','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='villeneuve d`ascq','country']=' france'
Sorted_rating.loc[Sorted_rating['country']=='westbank','country']=' israel'
Sorted_rating.loc[Sorted_rating['country']=='wien','country']=' austria'
Sorted_rating.loc[Sorted_rating['country']=='wrightstown','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='york haven','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']=='  usa','country']=' usa'
Sorted_rating.loc[Sorted_rating['country']==' saint helena, ascension, and tristan da cunha','country']=' usa'


In [None]:
coun=[]
for country in Sorted_rating['country'].unique():
    coun.append(country)
import pycountry
def do_fuzzy_search(country):
    result = pycountry.countries.search_fuzzy(country)
    return result[0].alpha_3
iso_map=[]
c=[]
for i in coun:
    try:
        iso_map.append(do_fuzzy_search(i))
        c.append(i)
    except:
        iso_map.append('unknown')
        c.append(i)
        continue
df1=pd.DataFrame(iso_map,c,columns=['code'])
df1.reset_index(inplace=True)
df1.loc[df1['index']==' laos','code']='LAO'
df1.loc[df1['index']==' cape verde ','code']='CPV'
df1=df1.set_index(['index'])

In [None]:
#investigating country:
new_df=Sorted_rating.groupby(['country','ID-User-key'], as_index=False).size()

country_count= pd.DataFrame(new_df.groupby('country',as_index=False)['ID-User-key'].count())

top20=country_count.sort_values('ID-User-key', ascending = False).head(10)
#fig=plt.figure(figsize=(10,7))
#sns.barplot

#viridis = cm.get_cmap('viridis', 15)#(np.linspace(0.9, 0.1, 10))

#colors = viridis(np.linspace(0, 1, 15))
viridis=sns.color_palette("viridis", 10)

fig, ax = plt.subplots(figsize=(6, 6))

sns.barplot(x="ID-User-key",y="country",data=top20,palette=viridis)
sns.set()
plt.xlabel("No of users")
plt.ylabel("")
# new helper method to auto-label bars
ax.bar_label(ax.containers[0])

plt.tight_layout()
plt.savefig('Figures/country_count.png')

In [None]:
l=list(df1.index)
country_code=[]
for i in Sorted_rating['country']:
    if i in l:
        country_code.append(df1['code'].loc[df1.index==i][0])
Sorted_rating['Country_Code'] = np.array(country_code)
grouped = Sorted_rating.groupby(['Country_Code','country'])
avg=pd.DataFrame(grouped['Book-Rating'].agg(np.mean))
avg.reset_index(inplace=True)
avg.columns=['Country Code','Country','Average Rating']
import plotly.express as px
fig=px.choropleth(avg, locations=avg['Country Code'],color=avg['Average Rating'],hover_name=avg['Country'],color_continuous_scale=px.colors.sequential.Viridis, width=800, height=500)


fig.show()

In [None]:
#Investigation of users

ratings_count = pd.DataFrame(Sorted_rating.groupby('User-ID',as_index=False)['Book-Rating'].count())
top10=ratings_count.sort_values('Book-Rating', ascending = False).head(10)
sizes=np.array(top10['Book-Rating'])
labels=np.array(top10['User-ID'])
label=[]
for i in labels:
    label.append('UserID: '+str(i))
total=np.sum(sizes)
#fig1, ax1 = plt.subplots()
explode =  [0.008 for i in range(10)]

viridis = cm.get_cmap('viridis', 20)#(np.linspace(0.9, 0.1, 10))

colors = viridis(np.linspace(0, 0.8, 10))
_, _, autotexts =plt.pie(sizes, labels=label, colors=colors, wedgeprops = { 'linewidth' : 1, 'edgecolor' : 'white' }, autopct=lambda p: '{:.0f}'.format(p * total / 100), startangle=90)

for ins in autotexts:
    ins.set_color('white')

plt.tight_layout()
plt.savefig('Figures/Pie_users.png')


In [None]:
#Saving the preprocessed dataset:
Finished_Data=Sorted_rating[['ID-Book-key','ID-User-key','Book-Title','Book-Author', 'Year-Of-Publication','Publisher','Age','country','Country_Code','Book-Rating']]
Finished_Data.to_csv('Preprocessed_Data.zip', index=False)