In [9]:
import requests
import matplotlib.pyplot as plt
import csv
import pandas as pd
import plotly.express as px


def writeToFile(papers_by_year, save_path, field_names):
    with open(save_path, "w+") as file:
        csvWriter = csv.writer(file, delimiter=",")
        csvWriter.writerow(field_names)
        for key in papers_by_year.keys():
            file.write("%s, %s\n" % (key, papers_by_year[key]))


def fetchDeepfakePapers(query):
    papers_by_year = {}
    for year in range(2000, 2023):
        result = requests.get(
            "http://api.semanticscholar.org/graph/v1/paper/search?query={}&year={}".format(
                query, year
            )
        )

        if result.reason != "OK":
            print("Error: ", result.status_code, result.reason)
            exit

        paper_records = result.json()
        num_papers = paper_records["total"]
        print("Total number of {} papers in {}: {}".format(query, year, num_papers))
        papers_by_year[year] = num_papers

    return papers_by_year


def plotData(file, title):
    df = pd.read_csv(file)
    fig = px.line(df, y=df.columns, x="Year", title=title)
    fig.show()

In [3]:
deepfake_file = "num_deepfake_papers_by_year.csv"
deepfake_ethics_file = "num_deepfake_ethics_papers_by_year.csv"
combined_file = "combined_data.csv"

# Fetch data

In [10]:
deepfake_papers_by_year = fetchDeepfakePapers("deepfake")

Total number of deepfake papers in 2000: 3195572
Total number of deepfake papers in 2001: 3386495
Total number of deepfake papers in 2002: 3667697
Total number of deepfake papers in 2003: 3992280
Total number of deepfake papers in 2004: 4527191
Total number of deepfake papers in 2005: 4880496
Total number of deepfake papers in 2006: 5237360
Total number of deepfake papers in 2007: 5659861
Total number of deepfake papers in 2008: 6124292
Total number of deepfake papers in 2009: 6595595
Total number of deepfake papers in 2010: 7041065
Total number of deepfake papers in 2011: 7489358
Total number of deepfake papers in 2012: 7873383
Total number of deepfake papers in 2013: 8258101
Total number of deepfake papers in 2014: 8473786
Total number of deepfake papers in 2015: 8703265
Total number of deepfake papers in 2016: 8757283
Total number of deepfake papers in 2017: 8311604
Total number of deepfake papers in 2018: 8261492
Total number of deepfake papers in 2019: 8408090
Total number of deep

In [7]:
deepfake_ethics_papers_by_year = fetchDeepfakePapers("deepfake+ethic")

Total number of deepfake+ethic papers in 2000: 3196212
Total number of deepfake+ethic papers in 2001: 3387292
Total number of deepfake+ethic papers in 2002: 3668510
Total number of deepfake+ethic papers in 2003: 3993304
Total number of deepfake+ethic papers in 2004: 4528357
Total number of deepfake+ethic papers in 2005: 4881791
Total number of deepfake+ethic papers in 2006: 5238802
Total number of deepfake+ethic papers in 2007: 5661386
Total number of deepfake+ethic papers in 2008: 6125894
Total number of deepfake+ethic papers in 2009: 6597140
Total number of deepfake+ethic papers in 2010: 7042901
Total number of deepfake+ethic papers in 2011: 7491219
Total number of deepfake+ethic papers in 2012: 7875216
Total number of deepfake+ethic papers in 2013: 8259951
Total number of deepfake+ethic papers in 2014: 8475799
Total number of deepfake+ethic papers in 2015: 8705214
Total number of deepfake+ethic papers in 2016: 8759346
Total number of deepfake+ethic papers in 2017: 8313493
Total numb

# Write files

In [11]:
writeToFile(deepfake_papers_by_year, save_path=deepfake_file, field_names=["Year", "NumPapers"])

In [12]:
writeToFile(deepfake_ethics_papers_by_year, save_path=deepfake_ethics_file, field_names=["Year", "NumPapers"])

# Plot all data

In [None]:
plotData(deepfake_file, "Number of Deepfake Paper Publications Over Time")

In [None]:
plotData(deepfake_ethics_file, "Number of Deepfake Paper Publications With Ethical Considerations Over Time")

In [None]:
plotData(
    combined_file,
    "Number of Deepfake Paper Publications With Ethical Considerations Over Time",
)

# Normalize combined data

In [None]:
import pandas as pd
from sklearn import preprocessing
import numpy as np

df = pd.read_csv(combined_file)
df = df[["NumDeepfakePapers", "NumDeepfakeEthicsPapers"]] #returns a numpy array
print("Original data", df)
min_max_scaler = preprocessing.MinMaxScaler()
dfScaled = min_max_scaler.fit_transform(df)
print("Normalized data", dfScaled)

np.savetxt("combinded_data_normalized.csv", dfScaled, delimiter=",")

# Most cited deepfake papers

In [None]:
from S2search import S2paperWeb
m = S2paperWeb()
numEntries = 5
m.get("deepfake", n=numEntries, sort="total-citations")
for i in range(0, numEntries):
    id = m.all['Results'][0]['Page']['Papers'][i]["id"]
    numCitations = m.all['Results'][0]['Page']['Papers'][i]["citationStats"]["numCitations"]
    print("Paper ", id, " has ", numCitations, " citations.")


# Most prolific institutions for deepfake research

In [None]:
most_cited_papers = ["37033b779765b5ed3b3eaaf8e1d5c5a62ff02e85", "2d066beb34469559e0fc5e5ab4d68dc736cfd46f", "300d08e8f5c310c2b194b7eb94398e480994d5cc", "3d26fb6e819a79b6abd4964d8d96314e74f73423", "2cdb1b96846609a965496236eaccb54b1790daab"]
papers_by_institution = {}
# for every prolific paper...
for paper in most_cited_papers:
    result = requests.get(
        "https://api.semanticscholar.org/graph/v1/paper/{id}/authors".format(
            id=paper,
        )
    )

    if result.reason != "OK":
        print("Error: ", result.status_code, result.reason)
        exit

    authors = result.json()["data"]
    # for every one of its authors...
    for i in range(0, len(authors)):
        id = authors[i]["authorId"]
        result = requests.get("https://api.semanticscholar.org/graph/v1/author/{id}?fields=affiliations".format(
            id=id)
        )
    
        # for every author's affiliations...
        if result.reason != "OK":
            print("Error: ", result.status_code, result.reason)
            exit
            
        affiliations = result.json()["affiliations"]
        for a in affiliations:
            if a in papers_by_institution:
                papers_by_institution[a] += 1
            else:
                papers_by_institution[a] = 1
                
print(papers_by_institution)
    