In [17]:
#importing libraries
import pandas as pd
import numpy as np
import plotly.express as px

In [18]:
# importing data
article_data = pd.read_csv("articles.leptospirosis.csv")
author_data = pd.read_csv("authors.leptospirosis.csv")
paper_counts = pd.read_csv("paper_counts.csv")

In [19]:
# taking out Year column to article_count dataframe
article_count = article_data["Year"].reset_index()
article_count.shape

(3389, 2)

In [20]:
## adding new column countLeptopsyrosis

# creates array of '1's
count = [i/i for i in range (1, article_count.shape[0]+1)] 

# adding array (new column) to dataframe
article_count['CountLeptopsyrosis'] = count

# grouping by year and adding articles published in those years
article_count_data = article_count.groupby('Year').agg('sum').reset_index()

article_count_data


Unnamed: 0,Year,index,CountLeptopsyrosis
0,1944,646,1.0
1,1945,644,1.0
2,1960,499,2.0
3,1961,3934,4.0
4,1963,249,1.0
5,1964,882,2.0
6,1970,1943,4.0
7,1976,1995,2.0
8,1977,1488,1.0
9,1981,1015,1.0


In [21]:
## line plot of Count of articles related to Leptopsyrosis

plot = px.line(
    # ignoring 2024 data as it is incomplete
    article_count_data.iloc[0:46, :],
    x = 'Year',
    y = 'CountLeptopsyrosis',
    title = "Count of articles related to Leptospyrosis",
    labels = {'CountLeptopsyrosis': 'Article Count'}, 
    line_shape = 'linear', 
    template = 'plotly', 
    color_discrete_sequence=['red'],
)

# Show the plot
plot.show()

In [22]:
## Histogram of Number of articles related to Leptopsyrosis

plot = px.histogram(
    article_data, 
    x = 'Year',
    title = "Count of articles related to Lepropsyrosis"    
)
plot.show()

In [23]:
paper_counts.head()

Unnamed: 0,Year,Count
0,1799,1
1,1801,1
2,1802,1
3,1805,1
4,1866,1


In [24]:
paper_counts.tail()

Unnamed: 0,Year,Count
138,2020,1132241
139,2021,1277048
140,2022,1324650
141,2023,1302390
142,2024,15611


In [25]:
# Joining article_count_data data to paper_counts data
new_data = pd.merge(paper_counts, article_count_data, on = 'Year', how = 'left')

# Taking selected columns for graph
merged_counts = new_data[['Year', 'Count', 'CountLeptopsyrosis']]

In [26]:
# copying dataframe
merged_counts_copy = merged_counts.copy()

# Replacing Nan values with zeros (zero articles)
merged_counts_copy['CountLeptopsyrosis'] = merged_counts_copy['CountLeptopsyrosis'].fillna(0)
merged_counts_copy

Unnamed: 0,Year,Count,CountLeptopsyrosis
0,1799,1,0.0
1,1801,1,0.0
2,1802,1,0.0
3,1805,1,0.0
4,1866,1,0.0
...,...,...,...
138,2020,1132241,202.0
139,2021,1277048,207.0
140,2022,1324650,214.0
141,2023,1302390,177.0


In [27]:
# Adding column with fraction

merged_counts_copy['FractionLepropsyrosis'] = merged_counts_copy['CountLeptopsyrosis']/merged_counts_copy['Count']


In [28]:
#line plot of fraction
plot_fraction = px.line(
    
    #taking whole data (except 2024)
    merged_counts_copy.iloc[0:142, :],
    x = 'Year',
    y = 'FractionLepropsyrosis',
    title="Fraction of articles related to Leptospyrosis (1799-2023)",
    labels={'FractionLepropsyrosis': 'Fraction of article related to Leptospyrosis'},
    line_shape='linear', 
    #template='plotly', 
    color_discrete_sequence=['red'],
    
)
plot_fraction.show()


In [29]:
#line plot of fraction
plot_fraction = px.line(
    # taking data from 1971 to 2023
    merged_counts_copy.iloc[89:142, :],
    x = 'Year',
    y = 'FractionLepropsyrosis',
    title="Fraction of articles related to Leptospyrosis (1971-2023)",
    labels={'FractionLepropsyrosis': 'Fraction of article related to Leptospyrosis'},
    line_shape='linear', 
    #template='plotly', 
    color_discrete_sequence=['red']
)    

# updating y axis ticks
plot_fraction.update_layout(
    yaxis = dict(
        showexponent = 'all',
        exponentformat = 'power'
    ))
    

plot_fraction.show()