In [1]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt 
import matplotlib.image as mpimg

import warnings
warnings.filterwarnings("ignore")

import seaborn as sns

import matplotlib.style as style

%matplotlib inline

In [3]:
met = pd.read_csv('MET.csv')

In [4]:
met.head()

Unnamed: 0,Object Number,Is Highlight,Is Public Domain,Is Timeline Work,Object ID,Department,AccessionYear,Object Name,Title,Culture,...,Excavation,River,Classification,Rights and Reproduction,Link Resource,Object Wikidata URL,Metadata Date,Repository,Tags,Tags AAT URL
0,1979.486.1,False,False,False,1,The American Wing,1979,Coin,One-dollar Liberty Head Coin,,...,,,Metal,,http://www.metmuseum.org/art/collection/search/1,,,"Metropolitan Museum of Art, New York, NY",,
1,1980.264.5,False,False,False,2,The American Wing,1980,Coin,Ten-dollar Liberty Head Coin,,...,,,Metal,,http://www.metmuseum.org/art/collection/search/2,,,"Metropolitan Museum of Art, New York, NY",,
2,67.265.9,False,False,False,3,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,...,,,Metal,,http://www.metmuseum.org/art/collection/search/3,,,"Metropolitan Museum of Art, New York, NY",,
3,67.265.10,False,False,False,4,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,...,,,Metal,,http://www.metmuseum.org/art/collection/search/4,,,"Metropolitan Museum of Art, New York, NY",,
4,67.265.11,False,False,False,5,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,...,,,Metal,,http://www.metmuseum.org/art/collection/search/5,,,"Metropolitan Museum of Art, New York, NY",,


In [5]:
met.columns

Index(['Object Number', 'Is Highlight', 'Is Public Domain', 'Is Timeline Work',
       'Object ID', 'Department', 'AccessionYear', 'Object Name', 'Title',
       'Culture', 'Period', 'Dynasty', 'Reign', 'Portfolio', 'Artist Role',
       'Artist Prefix', 'Artist Display Name', 'Artist Display Bio',
       'Artist Suffix', 'Artist Alpha Sort', 'Artist Nationality',
       'Artist Begin Date', 'Artist End Date', 'Artist Gender',
       'Artist ULAN URL', 'Artist Wikidata URL', 'Object Date',
       'Object Begin Date', 'Object End Date', 'Medium', 'Dimensions',
       'Credit Line', 'Geography Type', 'City', 'State', 'County', 'Country',
       'Region', 'Subregion', 'Locale', 'Locus', 'Excavation', 'River',
       'Classification', 'Rights and Reproduction', 'Link Resource',
       'Object Wikidata URL', 'Metadata Date', 'Repository', 'Tags',
       'Tags AAT URL'],
      dtype='object')

## Removing Columns
Removed unecessary columns and columns with a large amount of null entries

In [6]:
remove = ['Object Number', 'Artist Prefix', 'Portfolio', 'Artist Suffix', 
          'Artist ULAN URL', 'Artist Wikidata URL', 'Artist Alpha Sort', 
          'Geography Type', 'Excavation', 'River', 'Classification', 
          'Rights and Reproduction', 'Link Resource', 'Object Wikidata URL', 
          'Metadata Date', 'Repository', 'Tags AAT URL']
met = met.drop(remove, axis=1)
met.head()

Unnamed: 0,Is Highlight,Is Public Domain,Is Timeline Work,Object ID,Department,AccessionYear,Object Name,Title,Culture,Period,...,Credit Line,City,State,County,Country,Region,Subregion,Locale,Locus,Tags
0,False,False,False,1,The American Wing,1979,Coin,One-dollar Liberty Head Coin,,,...,"Gift of Heinz L. Stoppelmann, 1979",,,,,,,,,
1,False,False,False,2,The American Wing,1980,Coin,Ten-dollar Liberty Head Coin,,,...,"Gift of Heinz L. Stoppelmann, 1980",,,,,,,,,
2,False,False,False,3,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,,...,"Gift of C. Ruxton Love Jr., 1967",,,,,,,,,
3,False,False,False,4,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,,...,"Gift of C. Ruxton Love Jr., 1967",,,,,,,,,
4,False,False,False,5,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,,...,"Gift of C. Ruxton Love Jr., 1967",,,,,,,,,


In [7]:
print(met.isnull().sum())

Is Highlight                0
Is Public Domain            0
Is Timeline Work            0
Object ID                   0
Department                  0
AccessionYear            4156
Object Name              1936
Title                   29894
Culture                268151
Period                 385000
Dynasty                451247
Reign                  463207
Artist Role            204041
Artist Display Name    201947
Artist Display Bio     241101
Artist Nationality     291952
Artist Begin Date      238471
Artist End Date        238473
Artist Gender          373500
Object Date             14998
Object Begin Date           0
Object End Date             0
Medium                   7153
Dimensions              75485
Credit Line               417
City                   442624
State                  471855
County                 466143
Country                399296
Region                 443048
Subregion              452327
Locale                 458773
Locus                  466986
Tags      

In [8]:
met.shape

(474383, 34)

In [9]:
#Period, Dynasty, Reign, Artist Display Bio, Artist Nationality?, Artist Gender?, City, State, County, Country,
#Region, Subregion, Locale, Locus
blank = ['Period', 'Dynasty', 'Reign', 'Artist Display Bio', 'City', 'State', 'County', 'Country', 
         'Region', 'Subregion', 'Locale', 'Locus']
met = met.drop(blank, axis=1)
met.head()

Unnamed: 0,Is Highlight,Is Public Domain,Is Timeline Work,Object ID,Department,AccessionYear,Object Name,Title,Culture,Artist Role,...,Artist Begin Date,Artist End Date,Artist Gender,Object Date,Object Begin Date,Object End Date,Medium,Dimensions,Credit Line,Tags
0,False,False,False,1,The American Wing,1979,Coin,One-dollar Liberty Head Coin,,Maker,...,1794.0,1869.0,,1853,1853,1853,Gold,Dimensions unavailable,"Gift of Heinz L. Stoppelmann, 1979",
1,False,False,False,2,The American Wing,1980,Coin,Ten-dollar Liberty Head Coin,,Maker,...,1785.0,1844.0,,1901,1901,1901,Gold,Dimensions unavailable,"Gift of Heinz L. Stoppelmann, 1980",
2,False,False,False,3,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,,...,,,,1909–27,1909,1927,Gold,Diam. 11/16 in. (1.7 cm),"Gift of C. Ruxton Love Jr., 1967",
3,False,False,False,4,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,,...,,,,1909–27,1909,1927,Gold,Diam. 11/16 in. (1.7 cm),"Gift of C. Ruxton Love Jr., 1967",
4,False,False,False,5,The American Wing,1967,Coin,Two-and-a-Half Dollar Coin,,,...,,,,1909–27,1909,1927,Gold,Diam. 11/16 in. (1.7 cm),"Gift of C. Ruxton Love Jr., 1967",


## Removed Duplicate Entries

In [12]:
if met[met.duplicated()].shape[0] != 0: 
    met=met.drop_duplicates(keep='first')