In [184]:
import pandas as pd

In [185]:
df_cities = pd.read_csv('cities.csv')
df_monuments = pd.read_csv('monuments_data.csv')
df_monuments_description = pd.read_csv('monuments_description.csv')

In [186]:
df_monuments['type'] = 'Monument'
df_monuments['description'] = ''
df_monuments['rating'] = None
df_monuments['sponsored'] = False
df_monuments['schedule'] = None

In [187]:
df_monuments.drop(columns=['identifier', 'history'], inplace=True)

In [188]:
# count monuments by city
df_monuments['city'].value_counts()

city
Meknes         79
Rabat          72
Marrakesh      54
Fez            52
Salé           29
Essaouira      26
El Jadida      24
Tangier        13
Safi           12
Ouarzazate     10
Casablanca      9
Oujda           9
Agadir          6
Mehdya          5
Chefchaouen     5
Azilal          5
Guelmim         4
Figuig          4
Errachidia      4
El Hajeb        4
Taroudant       4
Zagora          4
Asilah          3
Nador           3
Sidi Kacem      3
Sidi Ifni       3
Name: count, dtype: int64

In [189]:
df_monuments.loc[df_monuments['city'] == 'Marrakesh', 'city'] = 'Marrakech'

In [190]:
# get cities in cities.csv
cities = df_cities['city'].unique()
cities

array(['Rabat', 'Casablanca', 'Tangier', 'Ifrane', 'Fez', 'Dakhla',
       'Marrakech', 'Meknes', 'Safi', 'Chefchaouen'], dtype=object)

In [191]:
# filter monuments by city
df_monuments_filtered = df_monuments[df_monuments['city'].isin(cities)]
df_monuments_filtered['city'].value_counts()

city
Meknes         79
Rabat          72
Marrakech      54
Fez            52
Tangier        13
Safi           12
Casablanca      9
Chefchaouen     5
Name: count, dtype: int64

In [192]:
# add Ifrane
df_monuments_filtered = df_monuments_filtered._append({'city': 'Ifrane',
                                                       'image': '',
                                                       'name': '',
                                                       'location': 'Ifrane, Morocco',
                                                      'coordinates': '33.5245, -5.1102',
                                                      'type': 'Monument',
                                                      'description': '',
                                                      'rating': None,
                                                      'sponsored': False,
                                                      'schedule': None},
                                                     ignore_index=True)


In [193]:
# count null coordinates by city
df_monuments_filtered[df_monuments_filtered['coordinates'].isnull()]['city'].value_counts()

city
Meknes         15
Fez            12
Rabat          10
Marrakech       6
Safi            2
Tangier         2
Chefchaouen     1
Name: count, dtype: int64

In [194]:
# drop monuments with null coordinates
df_monuments_filtered_clean = df_monuments_filtered.dropna(subset=['coordinates'])
df_monuments_filtered_clean['city'].value_counts()

city
Meknes         64
Rabat          62
Marrakech      48
Fez            40
Tangier        11
Safi           10
Casablanca      9
Chefchaouen     4
Ifrane          1
Name: count, dtype: int64

In [195]:
# choose maximumn 10 monuments per city 
df_monuments_filtered_clean = df_monuments_filtered_clean.groupby('city').head(10)
df_monuments_filtered_clean['city'].value_counts()

city
Fez            10
Marrakech      10
Meknes         10
Rabat          10
Safi           10
Tangier        10
Casablanca      9
Chefchaouen     4
Ifrane          1
Name: count, dtype: int64

In [196]:
df_monuments_filtered_clean

Unnamed: 0,city,image,name,location,coordinates,type,description,rating,sponsored,schedule
0,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Casablanca Cathedral,Casablanca,"33°35'28""N, 7°37'28""W",Monument,,,False,
1,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,El Hank Lighthouse,Casablanca,"33°36'35.71""N, 7°39'16.81""W",Monument,,,False,
2,Casablanca,,Notre-Dame-de-Lourdes Church,Casablanca,"33°34'57.5""N, 7°36'56.8""W",Monument,,,False,
3,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Arab League Park of Casablanca,Casablanca,"33°35'15.846""N, 7°37'28.668""W",Monument,,,False,
4,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Isesco Park,Casablanca,"33°34'45.988""N, 7°36'47.722""W",Monument,,,False,
...,...,...,...,...,...,...,...,...,...,...
289,Tangier,,Borj Es-Salam,Tangier,"35°47'13.664""N, 5°48'34.186""W",Monument,,,False,
290,Tangier,,Abraham Toledano Synagogue,Tangier,"35°47'6.428""N, 5°48'42.574""W",Monument,,,False,
291,Tangier,https://upload.wikimedia.org/wikipedia/commons...,Church of the Immaculate Conception,Tangier,"35°47'7""N, 5°48'41""W",Monument,,,False,
294,Tangier,https://upload.wikimedia.org/wikipedia/commons...,Bab El Bhar (Tangier),Tangier,"35°47'20.429""N, 5°48'43.610""W",Monument,,,False,


In [197]:
# add monuments description
df_monuments_filtered_clean = df_monuments_filtered_clean.merge(df_monuments_description, on='name', how='left')
df_monuments_filtered_clean['description_x'] = df_monuments_filtered_clean['description_y']
df_monuments_filtered_clean.drop(columns='description_y', inplace=True)
df_monuments_filtered_clean.rename(columns={'description_x': 'description'}, inplace=True)
df_monuments_filtered_clean

Unnamed: 0,city,image,name,location,coordinates,type,description,rating,sponsored,schedule
0,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Casablanca Cathedral,Casablanca,"33°35'28""N, 7°37'28""W",Monument,"Also known as the Sacré-Cœur Cathedral, this s...",,False,
1,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,El Hank Lighthouse,Casablanca,"33°36'35.71""N, 7°39'16.81""W",Monument,"Built in 1916, this lighthouse stands at 51 me...",,False,
2,Casablanca,,Notre-Dame-de-Lourdes Church,Casablanca,"33°34'57.5""N, 7°36'56.8""W",Monument,Notable for its modernist architecture and imp...,,False,
3,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Arab League Park of Casablanca,Casablanca,"33°35'15.846""N, 7°37'28.668""W",Monument,"A large urban park established in 1918, featur...",,False,
4,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Isesco Park,Casablanca,"33°34'45.988""N, 7°36'47.722""W",Monument,"Named after the Islamic Educational, Scientifi...",,False,
...,...,...,...,...,...,...,...,...,...,...
69,Tangier,,Borj Es-Salam,Tangier,"35°47'13.664""N, 5°48'34.186""W",Monument,"A historic fortification in Tangier, notable f...",,False,
70,Tangier,,Abraham Toledano Synagogue,Tangier,"35°47'6.428""N, 5°48'42.574""W",Monument,"A historic synagogue in Tangier, reflecting th...",,False,
71,Tangier,https://upload.wikimedia.org/wikipedia/commons...,Church of the Immaculate Conception,Tangier,"35°47'7""N, 5°48'41""W",Monument,"A Catholic church in Tangier, known for its ar...",,False,
72,Tangier,https://upload.wikimedia.org/wikipedia/commons...,Bab El Bhar (Tangier),Tangier,"35°47'20.429""N, 5°48'43.610""W",Monument,"A historic gate in Tangier, part of the city's...",,False,


In [198]:
# add random rating between 3 and 5, 2 decimal places
import random
df_monuments_filtered_clean['rating'] = df_monuments_filtered_clean['rating'].apply(lambda x: round(random.uniform(3, 5), 2))
# change type to float
df_monuments_filtered_clean['rating'] = df_monuments_filtered_clean['rating'].astype(float)
df_monuments_filtered_clean

Unnamed: 0,city,image,name,location,coordinates,type,description,rating,sponsored,schedule
0,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Casablanca Cathedral,Casablanca,"33°35'28""N, 7°37'28""W",Monument,"Also known as the Sacré-Cœur Cathedral, this s...",3.98,False,
1,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,El Hank Lighthouse,Casablanca,"33°36'35.71""N, 7°39'16.81""W",Monument,"Built in 1916, this lighthouse stands at 51 me...",3.32,False,
2,Casablanca,,Notre-Dame-de-Lourdes Church,Casablanca,"33°34'57.5""N, 7°36'56.8""W",Monument,Notable for its modernist architecture and imp...,4.52,False,
3,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Arab League Park of Casablanca,Casablanca,"33°35'15.846""N, 7°37'28.668""W",Monument,"A large urban park established in 1918, featur...",4.93,False,
4,Casablanca,https://upload.wikimedia.org/wikipedia/commons...,Isesco Park,Casablanca,"33°34'45.988""N, 7°36'47.722""W",Monument,"Named after the Islamic Educational, Scientifi...",4.87,False,
...,...,...,...,...,...,...,...,...,...,...
69,Tangier,,Borj Es-Salam,Tangier,"35°47'13.664""N, 5°48'34.186""W",Monument,"A historic fortification in Tangier, notable f...",3.79,False,
70,Tangier,,Abraham Toledano Synagogue,Tangier,"35°47'6.428""N, 5°48'42.574""W",Monument,"A historic synagogue in Tangier, reflecting th...",4.19,False,
71,Tangier,https://upload.wikimedia.org/wikipedia/commons...,Church of the Immaculate Conception,Tangier,"35°47'7""N, 5°48'41""W",Monument,"A Catholic church in Tangier, known for its ar...",3.00,False,
72,Tangier,https://upload.wikimedia.org/wikipedia/commons...,Bab El Bhar (Tangier),Tangier,"35°47'20.429""N, 5°48'43.610""W",Monument,"A historic gate in Tangier, part of the city's...",4.87,False,


In [199]:
df_monuments_filtered_clean.to_csv('monuments.csv', index=False)