In [1]:
import requests
import matplotlib.pyplot as plt
import csv
import pandas as pd
import plotly.express as px


def writeToFile(papers_by_year, save_path, field_names):
    with open(save_path, "w+") as file:
        csvWriter = csv.writer(file, delimiter=",")
        csvWriter.writerow(field_names)
        for key in papers_by_year.keys():
            file.write("%s, %s\n" % (key, papers_by_year[key]))


def fetchDeepfakePapers(query):
    papers_by_year = {}
    for year in range(2000, 2023):
        result = requests.get(
            "http://api.semanticscholar.org/graph/v1/paper/search?query={}&year={}".format(
                query, year
            )
        )

        if result.reason != "OK":
            print("Error: ", result.status_code, result.reason)
            exit

        paper_records = result.json()
        num_papers = paper_records["total"]
        print("Total number of {} papers in {}: {}".format(query, year, num_papers))
        papers_by_year[year] = num_papers

        return papers_by_year


def plotData(file, title):
    df = pd.read_csv(file)
    fig = px.line(df, y=df.columns, x="Year", title=title)
    fig.show()

In [6]:
deepfake_file = "num_deepfake_papers_by_year.csv"
deepfake_ethics_file = "num_deepfake_ethics_papers_by_year.csv"
combined_file = "combined_data.csv"

# Fetch data

In [None]:
deepfake_papers_by_year = fetchDeepfakePapers("deepfake")

In [None]:
deepfake_ethics_papers_by_year = fetchDeepfakePapers("deepfake+ethic")

# Write files

In [None]:
writeToFile(deepfake_papers_by_year, save_path=deepfake_file, field_names=["Year", "NumPapers"])

In [None]:
writeToFile(deepfake_ethics_papers_by_year, save_path=deepfake_ethics_file, field_names=["Year", "NumPapers"])

# Plot all data

In [None]:
plotData(deepfake_file, "Number of Deepfake Paper Publications Over Time")

In [None]:
plotData(deepfake_ethics_file, "Number of Deepfake Paper Publications With Ethical Considerations Over Time")

In [None]:
plotData(
    combined_file,
    "Number of Deepfake Paper Publications With Ethical Considerations Over Time",
)

# Normalize combined data

In [16]:
import pandas as pd
from sklearn import preprocessing
import numpy as np

df = pd.read_csv(combined_file)
df = df[["NumDeepfakePapers", "NumDeepfakeEthicsPapers"]] #returns a numpy array
print("Original data", df)
min_max_scaler = preprocessing.MinMaxScaler()
dfScaled = min_max_scaler.fit_transform(df)
print("Normalized data", dfScaled)

np.savetxt("combinded_data_normalized.csv", dfScaled, delimiter=",")

Original data     NumDeepfakePapers  NumDeepfakeEthicsPapers
0             3194825                  3195467
1             3385868                  3386666
2             3667076                  3667889
3             3991471                  3992497
4             4526587                  4527751
5             4880192                  4881486
6             5236653                  5238099
7             5659088                  5660620
8             6123537                  6125139
9             6594526                  6596070
10            7038482                  7040315
11            7467256                  7469109
12            7820983                  7822787
13            8206144                  8207971
14            8419942                  8421936
15            8649157                  8651075
16            8700248                  8702273
17            8255706                  8257552
18            8205111                  8206978
19            8366227                  8368076