## Combine all Musems & Normalizing

In [451]:
import pandas as pd 
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp
import os
import json

In [452]:
df_british = pd.read_csv('British_result.csv') 
df_global = pd.read_csv('combined_Global.csv')
df_met = pd.read_csv('combined_Met.csv')
df_rosi = pd.read_csv('combined_Rosi.csv')

## Which columns to leave ?

`The common are & to normalize ✨🌿`
1. Object_classification
2. material
3. path
4. date
5. location 
6. Description
7. period
8. Name

In [453]:
def adjust_image_path(cellvalue, cat_museum):
    file_name = cellvalue.split('\\')[-1]
    file_name = file_name.split('.')[-2]
    new_name = f"{cat_museum}_{file_name}.jpg"
    return new_name 

### British Museum

In [454]:
df_british.columns

Index(['Unnamed: 0', 'artifact_name', 'Description_ex', 'Materials_ex',
       'Findspot_ex', 'Dimensions_1', 'Dimensions_2', 'Location_ex',
       'Acquisition_name_ex', 'Acquisition_date_ex', 'Department_ex',
       'Cultures/periods_ex', 'image_path'],
      dtype='object')

In [455]:
df_british = df_british.drop(columns=["Unnamed: 0","Findspot_ex", "Dimensions_1", "Dimensions_2", "Location_ex", "Acquisition_name_ex", "Acquisition_date_ex", "Department_ex"])

In [456]:
df_british['image_path'] = df_british['image_path'].apply(lambda cellvalue: adjust_image_path(cellvalue, "bri"))
df_british.head(5)

Unnamed: 0,artifact_name,Description_ex,Materials_ex,Cultures/periods_ex,image_path
0,ear-ring; bead,Copper alloy ear-ring of thin wire with a loo...,copper alloy,,bri_A_1880-3687-m.jpg
1,hair-pin; dress-pin,Copper alloy ornamental hair- or dress-pin he...,copper alloy,,bri_A_1880-3711-o.jpg
2,hair-pin; dress-pin,Copper alloy ornamental hair- or dress-pin he...,copper alloy,,bri_A_1880-3714-a.jpg
3,hair-pin; dress-pin,Copper alloy ornamental hair- or dress-pin he...,copper alloy,,bri_A_1880-3717-f.jpg
4,ear-ring,"Cast, copper alloy hoop ear-ring bound with t...",copper alloy,,bri_A_1880-3960.jpg


In [457]:
df_british = df_british.rename(columns={'artifact_name': 'object_classification', 'Materials_ex':'material', 'image_path':'path', 'Cultures/periods_ex': 'period', 'Description_ex':'description' })

In [458]:
df_british['date'] = None
df_british['location'] = None
df_british['name'] = None

In [459]:
df_british

Unnamed: 0,object_classification,description,material,period,path,date,location,name
0,ear-ring; bead,Copper alloy ear-ring of thin wire with a loo...,copper alloy,,bri_A_1880-3687-m.jpg,,,
1,hair-pin; dress-pin,Copper alloy ornamental hair- or dress-pin he...,copper alloy,,bri_A_1880-3711-o.jpg,,,
2,hair-pin; dress-pin,Copper alloy ornamental hair- or dress-pin he...,copper alloy,,bri_A_1880-3714-a.jpg,,,
3,hair-pin; dress-pin,Copper alloy ornamental hair- or dress-pin he...,copper alloy,,bri_A_1880-3717-f.jpg,,,
4,ear-ring,"Cast, copper alloy hoop ear-ring bound with t...",copper alloy,,bri_A_1880-3960.jpg,,,
...,...,...,...,...,...,...,...,...
56830,papyrus,Papyrus Harris; column 75; Discourse to Manki...,papyrus,20th Dynasty,bri_Y_EA9999-62.jpg,,,
56831,papyrus,Papyrus Harris; column 76; Discourse to Manki...,papyrus,20th Dynasty,bri_Y_EA9999-75.jpg,,,
56832,papyrus,Papyrus Harris; column 77; Discourse to Manki...,papyrus,20th Dynasty,bri_Y_EA9999-76.jpg,,,
56833,papyrus,Papyrus Harris; column 78; Discourse to Manki...,papyrus,20th Dynasty,bri_Y_EA9999-77.jpg,,,


### Global Museum

In [460]:
df_global.columns

Index(['Unnamed: 0', 'Archaeological Site', 'Category', 'Material', 'Dating',
       'Description', 'Path', 'Name'],
      dtype='object')

In [461]:
df_global = df_global.drop(columns=['Unnamed: 0'])

In [462]:
df_global ['Path'] = df_global ['Path'].apply(lambda cellvalue: adjust_image_path(cellvalue, "glo"))
df_global .head(5)

Unnamed: 0,Archaeological Site,Category,Material,Dating,Description,Path,Name
0,el-faiyum,relief,limestone,roman period,several late period texts mention a lion god n...,glo_1.jpg,tithoes
1,unknown,coptic textile,wool,byzantine period,"this piece of fabric is a so-called ""taquetã©'...",glo_10.jpg,fragment of wedge shape
2,governorate of faiyum,basket,rush,graeco-roman period,this basket comes from the excavations of b. g...,glo_100.jpg,basket with lid
3,abydos,palette,greywacke,early dynastic period,this fragment of a rectangular palette diagram...,glo_1000.jpg,fragment of rectangular palette
4,thebes: west bank,finger ring,bronze,unknown,,glo_10000.jpg,


In [463]:
df_global = df_global.rename(columns={'Archaeological Site':'location', 'Category': 'object_classification','Material': 'material', 'Dating': 'period','Description':'description', "Name":'name',"Path":'path'})

In [464]:
df_global["date"] = None
df_global

Unnamed: 0,location,object_classification,material,period,description,path,name,date
0,el-faiyum,relief,limestone,roman period,several late period texts mention a lion god n...,glo_1.jpg,tithoes,
1,unknown,coptic textile,wool,byzantine period,"this piece of fabric is a so-called ""taquetã©'...",glo_10.jpg,fragment of wedge shape,
2,governorate of faiyum,basket,rush,graeco-roman period,this basket comes from the excavations of b. g...,glo_100.jpg,basket with lid,
3,abydos,palette,greywacke,early dynastic period,this fragment of a rectangular palette diagram...,glo_1000.jpg,fragment of rectangular palette,
4,thebes: west bank,finger ring,bronze,unknown,,glo_10000.jpg,,
...,...,...,...,...,...,...,...,...
15904,unknown,amulet,faience,unknown,,glo_9995.jpg,,
15905,thebes: west bank,necklace,shell,unknown,,glo_9996.jpg,,
15906,unknown,necklace,faience,unknown,,glo_9997.jpg,,
15907,unknown,necklace,faience,unknown,,glo_9998.jpg,,


### Met Museum

In [465]:
df_met.columns

Index(['Unnamed: 0', 'title', 'city', 'Path', 'locus', 'subregion', 'reign',
       'period', 'region', 'dynasty', 'objectName', 'medium', 'classification',
       'objectDate'],
      dtype='object')

In [466]:
df_met = df_met.drop(columns=['Unnamed: 0',"locus"])

In [467]:
df_met['Path'] = df_met['Path'].apply(lambda cellvalue: adjust_image_path(cellvalue, "met"))
df_met.head(5)

Unnamed: 0,title,city,Path,subregion,reign,period,region,dynasty,objectName,medium,classification,objectDate
0,Bastet holding aegis and basket,unknown,met_329767.jpg,unknown,unknown,Late Period -Ptolemaic Period,unknown,Dynasty 26 or later,"Bastet, standing, aegis, basket",Cupreous metal,unknown,664-30 B.C.
1,Upper half of a woman from a pair or group statue,unknown,met_329768.jpg,unknown,unknown,Late Middle Kingdom,unknown,Late Dynasty 12-13,Upper half of a woman from a pair or group statue,Granite,unknown,ca. 1850-1650 B.C.
2,Head from a figure of Osiris,unknown,met_329769.jpg,unknown,unknown,Late Period,unknown,Dynasty 26,Head from a figure of Osiris,Graywacke,unknown,ca. 664-525 B.C.
3,Figure of Osiris,unknown,met_329771.jpg,unknown,unknown,Late Period-Ptolemaic Period,unknown,unknown,"figure, Osiris",Cupreous metal,unknown,664-30 B.C.
4,Head and chest of a shabti figure,unknown,met_329774.jpg,unknown,unknown,Middle Kingdom,unknown,late Dynasty 12 to Dynasty 13,Head and chest of a shabti figure,Steatite or serpentinite,unknown,ca. 1850-1650 B.C.


In [468]:
df_met['object_classification'] = df_met['objectName'].str.split(',')
df_met = df_met.drop(columns=["objectName"])
df_met['location'] = df_met.apply(lambda row: [v for v in [row['city'], row['subregion'], row['region']] if pd.notna(v) and v != "unknown"], axis=1)
df_met = df_met.drop(columns=['city', 'subregion' ,'region'])
df_met['period'] = df_met.apply(lambda row: [v for v in [row['reign'], row['period'], row['dynasty']] if pd.notna(v) and v != "unknown"], axis=1)
df_met = df_met.drop(columns=['reign', 'dynasty'])
df_met['material'] = df_met.apply(lambda row: [v for v in [row['medium'], row['classification']] if pd.notna(v) and v != "unknown"], axis=1)
df_met = df_met.drop(columns=['medium', 'classification'])
df_met["name"] =None

In [469]:
df_met= df_met.rename(columns={'title': 'description',  'Path':'path', 'objectDate':'date'})

In [470]:
df_met

Unnamed: 0,description,path,period,date,object_classification,location,material,name
0,Bastet holding aegis and basket,met_329767.jpg,"[Late Period -Ptolemaic Period, Dynasty 26 or ...",664-30 B.C.,"[Bastet, standing, aegis, basket]",[],[Cupreous metal],
1,Upper half of a woman from a pair or group statue,met_329768.jpg,"[Late Middle Kingdom, Late Dynasty 12-13]",ca. 1850-1650 B.C.,[Upper half of a woman from a pair or group st...,[],[Granite],
2,Head from a figure of Osiris,met_329769.jpg,"[Late Period, Dynasty 26]",ca. 664-525 B.C.,[Head from a figure of Osiris],[],[Graywacke],
3,Figure of Osiris,met_329771.jpg,[Late Period-Ptolemaic Period],664-30 B.C.,"[figure, Osiris]",[],[Cupreous metal],
4,Head and chest of a shabti figure,met_329774.jpg,"[Middle Kingdom, late Dynasty 12 to Dynasty 13]",ca. 1850-1650 B.C.,[Head and chest of a shabti figure],[],[Steatite or serpentinite],
...,...,...,...,...,...,...,...,...
14891,Dress,met_85658.jpg,[],19th century,[Dress],[],"[Cotton, metal wrapped thread, Main dress-Wome...",
14892,Dress,met_85659.jpg,[],19th century,[Dress],[],"[Cotton, metal wrapped thread, Main dress-Wome...",
14893,Figure of a rearing cobra with feline head,met_857670.jpg,[Late Period - Ptolemaic Period],664-30 B.C.,[Figure of a rearing cobra with feline head],[],[Bronze or cupreous alloy],
14894,Gold foil fragments,met_905798.jpg,"[New Kingdom, Dynasty 18]",ca. 1550-1295 B.C.,[gold foil fragments],"[Dra Abu el-Naga, Upper Egypt, Thebes]",[Gold leaf],


### Rosi Museum

In [471]:
df_rosi.columns

Index(['Unnamed: 0', 'Title', 'Culture', 'Dynasty', 'Date', 'Period',
       'Material', 'object_Type', 'Path'],
      dtype='object')

In [472]:
df_rosi = df_rosi.drop(columns=['Unnamed: 0', 'Culture'])

In [473]:
df_rosi['Path'] = df_rosi['Path'].apply(lambda cellvalue: adjust_image_path(cellvalue, "ros"))
df_rosi.head(5)

Unnamed: 0,Title,Dynasty,Date,Period,Material,object_Type,Path
0,RC-1 - Sekhmet Statue,,332-30 BCE,Ptolemaic,Bronze,Sculpture,ros_789.jpg
1,RC-2 - Harpocrates,,305 - 30 BCE,Ptolemaic,Bronze,Sculpture,ros_931.jpg
2,RC-3 - Osiris,,332 - 31 BCE,Ptolemaic,Bronze,Sculpture,ros_365.jpg
3,RC-4 - Khonsu,,332 - 30 BCE,Ptolemaic,Bronze,Sculpture,ros_774.jpg
4,RC-5 - Apis Bull,,525 - 343 BCE,Late Period,Bronze,Sculpture,ros_913.jpg


In [474]:
df_rosi= df_rosi.rename(columns={'Title': 'name',  'Path':'path', 'Material':'material', 'Date':'date','object_Type':'object_classification'})

In [475]:
df_rosi["description"]=None
df_rosi['location'] = None

In [476]:
# Combine 'Column1' and 'Column2' into a new column 'Combined_Column'
df_rosi['period'] = df_rosi.apply(lambda row: [v for v in [row['Dynasty'], row['Period']] if pd.notna(v)], axis=1)

In [477]:
df_rosi = df_rosi.drop(columns=["Dynasty", "Period"])

In [478]:
df_rosi

Unnamed: 0,name,date,material,object_classification,path,description,location,period
0,RC-1 - Sekhmet Statue,332-30 BCE,Bronze,Sculpture,ros_789.jpg,,,[Ptolemaic]
1,RC-2 - Harpocrates,305 - 30 BCE,Bronze,Sculpture,ros_931.jpg,,,[Ptolemaic]
2,RC-3 - Osiris,332 - 31 BCE,Bronze,Sculpture,ros_365.jpg,,,[Ptolemaic]
3,RC-4 - Khonsu,332 - 30 BCE,Bronze,Sculpture,ros_774.jpg,,,[Ptolemaic]
4,RC-5 - Apis Bull,525 - 343 BCE,Bronze,Sculpture,ros_913.jpg,,,[Late Period]
...,...,...,...,...,...,...,...,...
819,RC-297 - Ptah,,Bronze,Sculpture,ros_221.jpg,,,[]
820,RC-251 - Rameses Ii Cartouche Fragment,,Granite,Epigraphy,ros_64.jpg,,,[]
821,RC-1727 - False Door with Ibis,,Limestone,Funerary,ros_536.jpg,,,[]
822,RC-5187 - Scarab,1784-1668 BCE,Faience,Amulets,ros_863.jpg,,,"[13TH, 2nd Intermediate]"


## Combine

In [479]:
# Get the symmetric differences between the sets of column names
columns_difference = set(df_met.columns).symmetric_difference(df_british.columns).symmetric_difference(df_global.columns).symmetric_difference(df_rosi.columns)

# Display the columns that are different between the DataFrames
print("Columns that are different between the DataFrames:", columns_difference)

Columns that are different between the DataFrames: set()
