<a href="https://colab.research.google.com/github/MinKimIP/IPA-public/blob/master/data_request/2020-03-17%20Cayman%20Islands%20Trade%20Mark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Request

17 March 2020

Customer asks Grateful if we could extract some of the leading Cayman Islands applicants and their trade mark classes.

---

This data request will be answered using [IPGOD 2019](https://data.gov.au/data/dataset/intellectual-property-government-open-data-2019).

## Scripts

Run the cell below without any changes.

In [0]:
import pandas as pd
import numpy as np

# data sources

def ip_data(ip_type, table):
    url_base = 'https://data.gov.au/data/dataset/a4210de2-9cbb-4d43-848d-46138fefd271/resource/'
    url = {'patent': {'process': '8fa6db74-a461-47f1-acc6-2e0cf7f06bd5/download/ipgod107.csv',
                      'applicant': '846990df-db42-4ad7-bbd6-567fd37a2797/download/ipgod102.csv',
                      'classification': '5aeec421-dddc-4c22-a66a-bfc5ad22947f/download/ipgod104.csv'},
           'trademark': {'process': '4dec358e-14ff-45ef-8b3e-b27274347e23/download/ipgod203.csv',
                         'applicant': 'aae1c14d-f8c0-4540-b5d3-1ed21500271e/download/ipgod202.csv',
                         'classification': 'fb505762-ab2a-4f56-999d-9bedd1da2ad5/download/ipgod204.csv'},
           'design': {'process': '9003a068-82fd-410d-a193-d54b8bc1f171/download/ipgod303.csv',
                      'applicant': '4b802e80-c667-4b84-8f50-72c2624c59c1/download/ipgod302.csv',
                      'classification': 'b01f7e00-a718-4e2d-9ffb-14938fd7dba9/download/ipgod304.csv'}}
    
    df = pd.read_csv(url_base+url[ip_type][table], low_memory=False)
    df = parse_dates(df)

    return df


main_key = {'patent': 'australian_appl_no',
            'trademark': 'tm_number',
            'design': 'application_id'}


# pipe components

def parse_dates(df):
    for column in df.columns:
        if "date" in column:
            df[column] = pd.to_datetime(df[column])
    
    return df

## Get data

In [11]:
tm_applications = ip_data('trademark', 'process')
tm_applicants = ip_data('trademark', 'applicant')
tm = (tm_applications[['tm_number', 'lodgement_date']]
      .merge(tm_applicants[['tm_number', 'ipa_id', 'name', 'country']], on='tm_number', how='left'))
del tm_applications
del tm_applicants
tm_classifications = ip_data('trademark', 'classification')
tm_classifications = (tm_classifications
                      [['tm_number', 'class_code']]
                      .drop_duplicates())
tm_classifications = (pd.get_dummies(tm_classifications, columns=['class_code'], prefix='nice_class')
                        .groupby(['tm_number']).sum().reset_index())
tm = tm.merge(tm_classifications, on='tm_number', how='left')
del tm_classifications
tm.sample(n=10)

Unnamed: 0,tm_number,lodgement_date,ipa_id,name,country,nice_class_1,nice_class_2,nice_class_3,nice_class_4,nice_class_5,nice_class_6,nice_class_7,nice_class_8,nice_class_9,nice_class_10,nice_class_11,nice_class_12,nice_class_13,nice_class_14,nice_class_15,nice_class_16,nice_class_17,nice_class_18,nice_class_19,nice_class_20,nice_class_21,nice_class_22,nice_class_23,nice_class_24,nice_class_25,nice_class_26,nice_class_27,nice_class_28,nice_class_29,nice_class_30,nice_class_31,nice_class_32,nice_class_33,nice_class_34,nice_class_35,nice_class_36,nice_class_37,nice_class_38,nice_class_39,nice_class_40,nice_class_41,nice_class_42,nice_class_43,nice_class_44,nice_class_45
1452350,926957,2002-09-12,587691.0,non-entity,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1311350,302150,1976-11-12,319836.0,Edgar Edmondson Pty Ltd,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
451561,501151,1988-12-08,183758.0,Myer Stores Limited,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
127640,545017,1990-10-31,64110.0,Smith & Nephew Inc a Delaware corporation,US,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
690653,1736211,2015-11-20,70915.0,DEPARTMENT OF HEALTH WA,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
656643,643867,1994-10-24,227862.0,ADI Limited,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
810328,1198296,2007-09-11,761673.0,non-entity,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
732888,875845,2001-05-17,253210.0,VITACO HEALTH AUSTRALIA PTY LIMITED,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93642,219002,1968-05-09,98862.0,"Heublein, Inc.",US,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
941074,724734,1996-12-24,194753.0,North East Equity Pty Ltd,AU,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Manipulate the data, ready for aggregation

In [27]:
df = (tm.query('country=="KY"')
        .assign(lodgement_year = lambda x: x['lodgement_date'].dt.year.astype(int))
        .drop(columns=['lodgement_date', 'country'])
        .query('lodgement_year >= 2011')
        .query('lodgement_year <= 2018')
        .assign(ipa_id = lambda x: x['ipa_id'].astype(int))
        .drop_duplicates())

nice_class_columns = [column for column in df.columns if 'class' in column]

for column in nice_class_columns:
    df[column] = df[column].fillna(0).astype(int)

agg_dict = {'tm_number': 'count',
            'name': 'last'}

for column in nice_class_columns:
    agg_dict[column] = np.sum

(df
 .groupby(['lodgement_year', 'ipa_id']).agg(agg_dict)
 .rename(columns={'tm_number': 'trade_mark_application_count'})
 .sort_values(['lodgement_year', 'trade_mark_application_count'], ascending=False))

Unnamed: 0_level_0,Unnamed: 1_level_0,trade_mark_application_count,name,nice_class_1,nice_class_2,nice_class_3,nice_class_4,nice_class_5,nice_class_6,nice_class_7,nice_class_8,nice_class_9,nice_class_10,nice_class_11,nice_class_12,nice_class_13,nice_class_14,nice_class_15,nice_class_16,nice_class_17,nice_class_18,nice_class_19,nice_class_20,nice_class_21,nice_class_22,nice_class_23,nice_class_24,nice_class_25,nice_class_26,nice_class_27,nice_class_28,nice_class_29,nice_class_30,nice_class_31,nice_class_32,nice_class_33,nice_class_34,nice_class_35,nice_class_36,nice_class_37,nice_class_38,nice_class_39,nice_class_40,nice_class_41,nice_class_42,nice_class_43,nice_class_44,nice_class_45
lodgement_year,ipa_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1
2018,947375,8,ONEMT Group Holding Limited,0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,3,0,0,0
2018,385917,5,"BeiGene, Ltd.",0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2018,226366,4,Tencent Holdings Limited,1,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0
2018,356230,3,Alibaba Group Holding Limited,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,2,2,0,1,1,0,2,2,0,0,0
2018,1005935,3,National Currency eXchange Group Limited (an E...,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2011,204034,1,Oro Agri International Ltd.,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2011,337993,1,FIORINO CORP,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2011,351069,1,advema IPM Ltd.,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,1,1,0,0,1
2011,916376,1,ARICENT,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0
