# Patent Data Dashboard Examples
This notebook demonstrates how to create various interactive charts using pandas and Plotly Express in Jupyter.

In [17]:
# 1. Imports and Data Loading
import pandas as pd
import plotly.express as px

# Load the dataset (adjust path as needed)
df = pd.read_csv(r'D:\Semester 4\Data Visualization\DataVisualizer\backend\data\example\CollumnFilterOECDdatasetWith50samples.csv')
# Inspect columns
print(df.columns)

Index(['Reference area', 'Technology domain', 'TIME_PERIOD'], dtype='object')


In [18]:
#rename columns TIME_PERIOD to Year
df.rename(columns={'TIME_PERIOD': 'Year'}, inplace=True)
#rename columns Reference area to Country
df.rename(columns={'Reference area': 'Country'}, inplace=True)
# Rename columns Technology domain to TechDomain
df.rename(columns={'Technology domain': 'TechDomain'}, inplace=True)


## Helper function for filtering

In [19]:
# Filter function: year could be int or (start, end), countries list, domains list
def filter_data(df, year=None, year_range=None, countries=None, domains=None):
    df_f = df.copy()
    if year is not None:
        df_f = df_f[df_f['Year'] == year]
    if year_range is not None:
        start, end = year_range
        df_f = df_f[(df_f['Year'] >= start) & (df_f['Year'] <= end)]
    if countries is not None:
        df_f = df_f[df_f['Country'].isin(countries)]
    if domains is not None:
        df_f = df_f[df_f['TechDomain'].isin(domains)]
    return df_f

## 1) Bar chart by Country for a given year or range

In [None]:
# Parameters
year = 2020              # single year; set to None if using range
year_range = None        # e.g., (2018, 2022) for specific range
selected_countries = ['USA', 'Japan', 'Germany']  # or None for all
data1 = filter_data(df, year=year, year_range=year_range, countries=selected_countries)

# Plot
fig1 = px.bar(
    data1,
    x='Country',
    y='NumPatents',
    title=f'Number of Patents by Country ({year if year else str(year_range[0]) + "–" + str(year_range[1])})',
    labels={'NumPatents': 'Number of Patents'}
)
fig1.show()

ValueError: Value of 'y' is not the name of a column in 'data_frame'. Expected one of ['Country', 'TechDomain', 'Year'] but received: NumPatents

: 

## 2) Grouped bar chart by Technology Domain over years

In [None]:
# Parameters
year = None
year_range = (2015, 2020)
selected_domain = ['Biotech', 'AI']  # list of domains or None for all
data2 = filter_data(df, year=year, year_range=year_range, domains=selected_domain)

# Plot grouped bar
fig2 = px.bar(
    data2,
    x='Year',
    y='NumPatents',
    color='TechDomain',
    barmode='group',
    title=f'Patents by Tech Domain {year_range[0]}–{year_range[1]}',
    labels={'NumPatents': 'Number of Patents'}
)
fig2.show()

## 3) Line chart: Patents over Years (single series)

In [None]:
# Parameters
year_range = (2010, 2020)
data3 = filter_data(df, year_range=year_range)
# Aggregate if multiple countries/domains
agg3 = data3.groupby('Year')['NumPatents'].sum().reset_index()

fig3 = px.line(
    agg3,
    x='Year',
    y='NumPatents',
    title=f'Total Patents Over Years {year_range[0]}–{year_range[1]}',
    markers=True
)
fig3.show()

## 4) Multi-line chart: Each Country over Years

In [None]:
# Parameters
year_range = (2010, 2020)
selected_countries = ['USA', 'China', 'Germany']
data4 = filter_data(df, year_range=year_range, countries=selected_countries)

# Aggregate per country-year
agg4 = data4.groupby(['Year', 'Country'])['NumPatents'].sum().reset_index()

fig4 = px.line(
    agg4,
    x='Year',
    y='NumPatents',
    color='Country',
    title=f'Patents by Country Over Years {year_range[0]}–{year_range[1]}',
    markers=True
)
fig4.show()