<a href="https://colab.research.google.com/github/MinKimIP/IPA-public/blob/master/data_request/2020-03-17%20Cayman%20Islands%20Trade%20Mark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Request

27 February 2020

Customer asks if sa2 and lga information can be obtained from IPGOD 2019.

---

This data request will be answered using [IPGOD 2019](https://data.gov.au/data/dataset/intellectual-property-government-open-data-2019).

We note that IPGOD 2019 has many data quality issues.

## Scripts

Run the cell below without any changes.

In [0]:
import pandas as pd
import plotly.express as px

# data sources

def ip_data(ip_type, table):
    url_base = 'https://data.gov.au/data/dataset/a4210de2-9cbb-4d43-848d-46138fefd271/resource/'
    url = {'patent': {'process': '8fa6db74-a461-47f1-acc6-2e0cf7f06bd5/download/ipgod107.csv',
                      'applicant': '846990df-db42-4ad7-bbd6-567fd37a2797/download/ipgod102.csv',
                      'classification': '5aeec421-dddc-4c22-a66a-bfc5ad22947f/download/ipgod104.csv'},
           'trademark': {'process': '4dec358e-14ff-45ef-8b3e-b27274347e23/download/ipgod203.csv',
                         'applicant': 'aae1c14d-f8c0-4540-b5d3-1ed21500271e/download/ipgod202.csv',
                         'classification': 'fb505762-ab2a-4f56-999d-9bedd1da2ad5/download/ipgod204.csv'},
           'design': {'process': '9003a068-82fd-410d-a193-d54b8bc1f171/download/ipgod303.csv',
                      'applicant': '4b802e80-c667-4b84-8f50-72c2624c59c1/download/ipgod302.csv',
                      'classification': 'b01f7e00-a718-4e2d-9ffb-14938fd7dba9/download/ipgod304.csv'}}
    
    df = pd.read_csv(url_base+url[ip_type][table], low_memory=False)
    df = parse_dates(df)

    return df


main_key = {'patent': 'australian_appl_no',
            'trademark': 'tm_number',
            'design': 'application_id'}


# pipe components

def parse_dates(df):
    for column in df.columns:
        if "date" in column:
            df[column] = pd.to_datetime(df[column])
    
    return df

## Get data

In [0]:
tm_applications = ip_data('trademark', 'process')
tm_applicants = ip_data('trademark', 'applicant')
tm = tm_applications.merge(tm_applicants, on='tm_number', how='left')
del tm_applications
del tm_applicants
tm.sample(n=10)

Unnamed: 0,tm_number,status_code_desc,type_of_mark_code,expedite_flag_ind,non_use_flag_ind,trademark_type,lodgement_date,filing_date,priority_date,registered_date,renewal_due_date,priority_number,priority_country_code,court_orders_ind,ir_number_notify_date,madrid_application_ind,ipa_id,country,australian,entity,name,cleanname,corp_desg,state,postcode,lat,lon,sa2_code,sa2_name,lga_code,lga_name,gcc_name,elect_div,abn,acn
614611,1094046,Removed - Not Renewed : Renewal fee not paid,Trade Mark,False,False,Figurative,2006-01-11,2006-01-11,2006-01-11,2006-01-11,2016-01-11,,,False,NaT,False,247886.0,AU,True,True,Valve Tech Engineering Pty Ltd,VALVE TECH ENGINEERING,PTY LTD,VIC,3056.0,-37.767,144.953,206011105.0,Brunswick,25250.0,Moreland (C),Greater Melbourne,Wills,22055160000.0,55161674.0
1569890,951688,Registered : Registered/protected,Trade Mark,False,False,Word,2003-04-28,2003-04-28,2003-04-28,2003-04-28,2023-04-28,,,False,NaT,False,196626.0,AU,True,True,The NTF Group Pty Ltd,NTF GROUP,PTY LTD,NSW,2000.0,-33.866,151.201,117031337.0,Sydney - Haymarket - The Rocks,17200.0,Sydney (C),Greater Sydney,Sydney,60070860000.0,70857279.0
732166,1107180,Lapsed : Not accepted,Trade Mark,False,False,Figurative,2006-04-05,2006-04-05,2006-04-05,NaT,NaT,,,False,NaT,False,401260.0,US,False,True,"Brown Mechanic, Inc. a California corporation",BROWN MECHANIC,INC,,,,,,,,,,,,
82195,1253947,Registered : Expired renewal possible,Trade Mark,False,False,Word,2008-07-29,2008-07-29,2008-07-29,2008-07-29,2018-07-29,,,False,NaT,False,154263.0,AU,True,True,Loyalty Marketing Pty Ltd,LOYALTY MARKETING,PTY LTD,VIC,3141.0,-37.839,144.994,206061137.0,South Yarra - East,26350.0,Stonnington (C),Greater Melbourne,Higgins,70635020000.0,
1061639,1823553,Registered : Registered/protected,Trade Mark,True,False,Figurative,2017-02-02,2017-02-02,2017-02-02,2017-02-02,2027-02-02,,,False,NaT,False,945957.0,AU,True,True,PTJP Online Pty Ltd as trustee for PTJP Online...,PTJP ONLINE,PTY LTD,TAS,7250.0,-41.433,147.155,602011042.0,Newstead,64010.0,Launceston (C),Rest of Tas.,Bass,53730700000.0,
740915,1254328,Protected : Registered/protected,Trade Mark,True,False,Word,2008-01-29,2008-07-31,2007-11-02,2008-01-29,2028-01-29,RN2007C000325,IT,False,2008-07-31,True,64372.0,IT,False,True,S.M.R.E. S.p.A.,SMRE,SPA,,,,,,,,,,,,
521580,733646,Removed - Not Renewed : Renewal fee not paid,Trade Mark,False,False,Figurative,1997-04-30,1997-04-30,1997-04-30,1997-04-30,2007-04-30,,,False,NaT,False,227499.0,AU,True,True,Texi-Pave NSW Pty Ltd,TEXIPAVE NSW,PTY LTD,NSW,2752.0,-33.893,150.604,124031465.0,Warragamba - Silverdale,18400.0,Wollondilly (A),Greater Sydney,Hume,54073010000.0,73011702.0
880566,1268114,Lapsed : Not accepted,Trade Mark,False,False,Figurative,2008-10-20,2008-10-20,2008-10-20,NaT,NaT,,,False,NaT,False,286802.0,AU,True,True,Cole Classic Incorporated,COLE CLASSIC,INC,NSW,2025.0,-33.887,151.237,118011347.0,Woollahra,18500.0,Woollahra (A),Greater Sydney,Wentworth,69171470000.0,
1508963,1787078,Registered : Registered/protected,Trade Mark,False,False,Word,2016-08-02,2016-08-02,2016-08-02,2016-08-02,2026-08-02,,,False,NaT,False,1019496.0,AU,True,True,Golden Produce l.P. Pty Ltd,GOLDEN PRODUCE,PTY LTD,VIC,3123.0,-37.822,145.046,207011152.0,Hawthorn East,21110.0,Boroondara (C),Greater Melbourne,Kooyong,,
1045304,1566853,Lapsed : Not accepted,Trade Mark,False,False,Fancy,2013-07-04,2013-07-04,2013-07-04,NaT,NaT,,,False,NaT,False,376207.0,AU,True,True,Australian Pork Limited,AUSTRALIAN PORK,LTD,ACT,2600.0,-35.309,149.138,801061129.0,Barton,89399.0,Unincorporated ACT,Australian Capital Territory,Canberra,83092780000.0,92783278.0


## Manipulate the data, ready for aggregation

In [0]:
df = (tm[['tm_number', 'lodgement_date', 'sa2_name', 'lga_name']]
        .dropna()
        .assign(lodgement_year = lambda x: x['lodgement_date'].dt.year.astype(int))
        .drop(columns='lodgement_date')
        .query('lodgement_year >= 2000')
        .query('lodgement_year <= 2018')
        .drop_duplicates())

# save for downloading
df.to_csv('trademark.csv', index=False, encoding='utf-8')

df.sample(n=10)

Unnamed: 0,tm_number,sa2_name,lga_name,lodgement_year
52309,1299677,Homebush Bay - Silverwater,Parramatta (C),2009
530112,1678446,Willunga,Onkaparinga (C),2015
1606750,868641,Brunswick Heads - Ocean Shores,Byron (A),2001
245669,1802167,Macquarie Park - Marsfield,Ryde (C),2016
1308846,945326,Rockdale - Banksia,Bayside (A),2003
900339,1945501,Woolner - Bayview - Winnellie,Darwin (C),2018
1445334,1578401,Sydney - Haymarket - The Rocks,Sydney (C),2013
1265700,1226361,Horsley Park - Kemps Creek,Fairfield (C),2008
1001345,1521498,Crows Nest - Waverton,North Sydney (A),2012
1613089,1336188,Ballina Region,Ballina (A),2009


In [0]:
by_sa2 = df[['sa2_name', 'lodgement_year', 'tm_number']].groupby(['sa2_name', 'lodgement_year']).count().reset_index()

px.line(by_sa2, x="lodgement_year", y="tm_number", color="sa2_name")

In [0]:
by_lga = df[['lga_name', 'lodgement_year', 'tm_number']].groupby(['lga_name', 'lodgement_year']).count().reset_index()

px.line(by_lga, x="lodgement_year", y="tm_number", color="lga_name")