In [None]:
import numpy as np
import pandas as pd
import re
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def rename_cols(df, master_name):
  rename_dict = {df.columns[i]: master_name[i] for i in range(len(master_name))}
  end_df = df.rename(columns=rename_dict)

   ## validation box
  print("column names before changes:")
  print(df.columns)
  print(" ")
  print("column names after changes:")
  print(end_df.columns)

  return end_df

# Trimm dataset into managable resource

The objective of this step is to trim all files into having a common collumns structure. That way data is more managable to process. THe following steps are performed in this process:

- creating a set of required columns for data processing
- from each scrapped files, trim each files into having only the required columns
- join all dataset into 1 csv file for title assessment


In [None]:
##### Dataset columns should be trimmed to the following columns
master_col = ["Author", "Title", "Title_2", "Record_link", "DOI", "Year"]

# RecordID  (index)  : unique identifier for each record, ID is valid only for this database
# Author      : Author of the study. (as written in for citation export)
# Title       : TItle of study
# Title_2     : Second Title of study (if any), applicable if records have a foreign title
# Record_link : Raw link of the records
# DOI         : DIgital Object Identifier for each record
# Year        : Publication year of the study

# TRID Files


---




exploration of target columns

In [None]:
master_col

['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year']

In [None]:
TRID_authors = pd.read_csv("/content/TRID-CSV_2024-05-21.csv",
                           #index_col = "RecordID",
                           usecols=["Author1", "Author2", "Author3", "Author4",
                                    "Author5", "Author6", "Author7", "Author8"])


In [None]:
TRID_authors['All_Authors'] = TRID_authors[TRID_authors.columns].apply(
    lambda x: ';'.join(x.dropna().astype(str)),
    axis=1
)
TRID_author_complied = TRID_authors["All_Authors"]

In [None]:
useful_colsTRID = ["Title", "Author_all",]

In [None]:
TRID_filtered = pd.read_csv("/content/TRID-CSV_2024-05-21.csv",
                            usecols=["Title", "ForeignTitle", "Record URL1", "Record URL2", "Publication date"])

In [None]:
TRID_filtered.head(3)

Unnamed: 0,Title,ForeignTitle,Record URL1,Record URL2,Publication date
0,Enhanced Safety of Heavy-Duty Vehicles on High...,,https://doi.org/10.4271/2024-01-1964,,2024-04-09
1,Restricted Speed Enforcement for Positive Trai...,,https://railroads.dot.gov/sites/fra.dot.gov/fi...,https://rosap.ntl.bts.gov/view/dot/74200,2024-04-00
2,"On Speed Management, Public Health, and Risky ...",,https://doi.org/10.1177/03611981231182419,,2024-03-00


In [None]:
TRID_join = pd.concat([TRID_author_complied, TRID_filtered], axis=1)
TRID_join.head(2)

Unnamed: 0,All_Authors,Title,ForeignTitle,Record URL1,Record URL2,Publication date
0,"Shiledar, Ankur;Sujan, Vivek;Siekmann, Adam;Yu...",Enhanced Safety of Heavy-Duty Vehicles on High...,,https://doi.org/10.4271/2024-01-1964,,2024-04-09
1,"Liu, Xiang;Holt, Keith;Zhang, Zhipeng;Liu, Xia...",Restricted Speed Enforcement for Positive Trai...,,https://railroads.dot.gov/sites/fra.dot.gov/fi...,https://rosap.ntl.bts.gov/view/dot/74200,2024-04-00


In [None]:
TRID_join["Publication date"] = TRID_join["Publication date"].astype(str)
TRID_join["Year"] = TRID_join["Publication date"].apply(lambda text: text.split("-")[0])
TRID_join.head(3)

Unnamed: 0,All_Authors,Title,ForeignTitle,Record URL1,Record URL2,Publication date,Year
0,"Shiledar, Ankur;Sujan, Vivek;Siekmann, Adam;Yu...",Enhanced Safety of Heavy-Duty Vehicles on High...,,https://doi.org/10.4271/2024-01-1964,,2024-04-09,2024
1,"Liu, Xiang;Holt, Keith;Zhang, Zhipeng;Liu, Xia...",Restricted Speed Enforcement for Positive Trai...,,https://railroads.dot.gov/sites/fra.dot.gov/fi...,https://rosap.ntl.bts.gov/view/dot/74200,2024-04-00,2024
2,"Valderrama, Segundo Lopez;Palacios, Manuel San...","On Speed Management, Public Health, and Risky ...",,https://doi.org/10.1177/03611981231182419,,2024-03-00,2024


In [None]:
TRID_join.drop(columns="Publication date", inplace=True)
TRID_join

Unnamed: 0,All_Authors,Title,ForeignTitle,Record URL1,Record URL2,Year
0,"Shiledar, Ankur;Sujan, Vivek;Siekmann, Adam;Yu...",Enhanced Safety of Heavy-Duty Vehicles on High...,,https://doi.org/10.4271/2024-01-1964,,2024
1,"Liu, Xiang;Holt, Keith;Zhang, Zhipeng;Liu, Xia...",Restricted Speed Enforcement for Positive Trai...,,https://railroads.dot.gov/sites/fra.dot.gov/fi...,https://rosap.ntl.bts.gov/view/dot/74200,2024
2,"Valderrama, Segundo Lopez;Palacios, Manuel San...","On Speed Management, Public Health, and Risky ...",,https://doi.org/10.1177/03611981231182419,,2024
3,"Thapa, Diwas;Mishra, Sabyasachee;Khattak, Asad...",Assessing driver behavior in work zones: A dis...,,https://doi.org/10.1016/j.aap.2023.107427,http://www.sciencedirect.com/science/article/p...,2024
4,;,Effectiveness of Automated Speed Enforcement i...,,,,
...,...,...,...,...,...,...
490,"HOOK, P;HOOK, P;HOOK, P;HOOK, P",A PICTURE OF ACCURACY,,,,2003
491,"Smith, G;SENSERRICK, T M;Smith, G;SENSERRICK, ...",NEW SPEED ENFORCEMENT INITIATIVES: CHANGING AT...,,,,2003
492,"CHAMPNESS, P;FOLKMAN, L;CHAMPNESS, P;FOLKMAN, ...",THE IMPACT OF SPEED CAMERAS ON SPEED-RELATED C...,,,,2003
493,"Lave, C;Elias, P;Lave, C;Elias, P;Lave, C;Elia...",RESOURCE ALLOCATION IN PUBLIC POLICY: THE EFFE...,,,,2003


In [None]:
rename_dict = {TRID_join.columns[i]: master_col[i] for i in range(len(master_col))}
TRID_join.rename(columns=rename_dict, inplace=True)

In [None]:
TRID_join.head(4)

Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,"Shiledar, Ankur;Sujan, Vivek;Siekmann, Adam;Yu...",Enhanced Safety of Heavy-Duty Vehicles on High...,,https://doi.org/10.4271/2024-01-1964,,2024
1,"Liu, Xiang;Holt, Keith;Zhang, Zhipeng;Liu, Xia...",Restricted Speed Enforcement for Positive Trai...,,https://railroads.dot.gov/sites/fra.dot.gov/fi...,https://rosap.ntl.bts.gov/view/dot/74200,2024
2,"Valderrama, Segundo Lopez;Palacios, Manuel San...","On Speed Management, Public Health, and Risky ...",,https://doi.org/10.1177/03611981231182419,,2024
3,"Thapa, Diwas;Mishra, Sabyasachee;Khattak, Asad...",Assessing driver behavior in work zones: A dis...,,https://doi.org/10.1016/j.aap.2023.107427,http://www.sciencedirect.com/science/article/p...,2024


In [None]:
TRID_join.to_excel("TRID_cleaned_495.xlsx")

# Google Scholar files


---



exploration of target columns

In [None]:
master_col

['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year']

In [None]:
GG_filtered = pd.read_csv("/content/PoP-GGScholarResult.csv",
                          usecols=["Authors", "Title", "Source", "ArticleURL", "FullTextURL", "Year"])

In [None]:
GG_reorder = GG_filtered[["Authors", "Title", "Source", "ArticleURL", "FullTextURL", "Year"]]
GG_reorder.head(3)

Unnamed: 0,Authors,Title,Source,ArticleURL,FullTextURL,Year
0,"S Shaheen, CJ Rodier, E Cavanagh",Automated speed enforcement in the US: a revie...,,https://escholarship.org/uc/item/41k1k365,https://escholarship.org/content/qt41k1k365/qt...,2007.0
1,"CJ Rodier, SA Shaheen, E Cavanagh",Automated speed enforcement in the US: a revie...,… Research Board 87 th …,https://www.academia.edu/download/75396981/Aut...,https://www.academia.edu/download/75396981/Aut...,2007.0
2,AA Siregar,How can speed enforcement be made more effecti...,,https://etheses.whiterose.ac.uk/20653/,https://etheses.whiterose.ac.uk/20653/1/Sirega...,2018.0


In [None]:
rename_GG = {GG_reorder.columns[i]: master_col[i] for i in range(len(master_col))}
GG_reorder.rename(columns=rename_GG, inplace=True)
GG_reorder.head(3)

Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,"S Shaheen, CJ Rodier, E Cavanagh",Automated speed enforcement in the US: a revie...,,https://escholarship.org/uc/item/41k1k365,https://escholarship.org/content/qt41k1k365/qt...,2007.0
1,"CJ Rodier, SA Shaheen, E Cavanagh",Automated speed enforcement in the US: a revie...,… Research Board 87 th …,https://www.academia.edu/download/75396981/Aut...,https://www.academia.edu/download/75396981/Aut...,2007.0
2,AA Siregar,How can speed enforcement be made more effecti...,,https://etheses.whiterose.ac.uk/20653/,https://etheses.whiterose.ac.uk/20653/1/Sirega...,2018.0


In [None]:
GG_reorder.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 359 entries, 0 to 358
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Author       359 non-null    object 
 1   Title        359 non-null    object 
 2   Title_2      264 non-null    object 
 3   Record_link  359 non-null    object 
 4   DOI          255 non-null    object 
 5   Year         344 non-null    float64
dtypes: float64(1), object(5)
memory usage: 17.0+ KB


In [None]:
GG_reorder["Year"]  = GG_reorder["Year"].astype(str).str[:4]
GG_reorder.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 359 entries, 0 to 358
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Author       359 non-null    object
 1   Title        359 non-null    object
 2   Title_2      264 non-null    object
 3   Record_link  359 non-null    object
 4   DOI          255 non-null    object
 5   Year         359 non-null    object
dtypes: object(6)
memory usage: 17.0+ KB


In [None]:
GG_reorder

Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,"S Shaheen, CJ Rodier, E Cavanagh",Automated speed enforcement in the US: a revie...,,https://escholarship.org/uc/item/41k1k365,https://escholarship.org/content/qt41k1k365/qt...,2007
1,"CJ Rodier, SA Shaheen, E Cavanagh",Automated speed enforcement in the US: a revie...,… Research Board 87 th …,https://www.academia.edu/download/75396981/Aut...,https://www.academia.edu/download/75396981/Aut...,2007
2,AA Siregar,How can speed enforcement be made more effecti...,,https://etheses.whiterose.ac.uk/20653/,https://etheses.whiterose.ac.uk/20653/1/Sirega...,2018
3,Z Abouchacra,Automated Speed Enforcement as a Mechanism of ...,,https://ruor.uottawa.ca/handle/10393/45429,https://ruor.uottawa.ca/bitstream/10393/45429/...,2023
4,SA Al Ramadhani,A gap analysis of the automated speed enforcem...,,https://eprints.qut.edu.au/134252,https://eprints.qut.edu.au/134252/1/Saif_Al%20...,2019
...,...,...,...,...,...,...
354,I TAUFIQURRAHMAN,Design dan Implementasi Speed Gun Berbasis Radar,,https://repository.telkomuniversity.ac.id/pust...,,2019
355,N Dharma Kusuma,IMPLEMENTASI TILANG ATAS PELANGGARAN BATAS KEC...,,http://repository.upnjatim.ac.id/id/eprint/742,http://repository.upnjatim.ac.id/742/1/Cover.pdf,2020
356,P Marks,Speed gun with a twist,New scientist,https://dialnet.unirioja.es/servlet/articulo?c...,,2013
357,A Gasruddin,Studi Kecepatan Kendaraan Menggunakan Alat Apl...,Jurnal MEDIA INOVASI Teknik Sipil …,https://www.ejournal.lppmunidayan.ac.id/index....,https://www.ejournal.lppmunidayan.ac.id/index....,2017


In [None]:
GG_reorder.to_excel("GGScholar_cleaned_359.xlsx")

# Scopus Data


---



In [None]:
scopus_df = pd.read_csv("/content/scopus_Paper.csv")
scopus_df.head(3)

Unnamed: 0,Authors,Author full names,Author(s) ID,Title,Year,Source title,DOI,Link,Abstract
0,Agina S.; Momeni Rad F.; El-Basyouny K.,"Agina, Samaa (57355267500); Momeni Rad, Faeze ...",57355267500; 58772868300; 14832701400,Linking Deployment Outcomes to Program Impacts...,2023,Safety,10.3390/safety9040088,https://www.scopus.com/inward/record.uri?eid=2...,Previous research has delved into the effectiv...
1,Ammar K.; Al-Emami A.; Baher A.,"Ammar, Khalid (57213686839); Al-Emami, Abdulla...",57213686839; 57245283200; 57245189000,Real-time Vehicle Speed Enforcement System,2021,2021 10th Mediterranean Conference on Embedded...,10.1109/MECO52532.2021.9460297,https://www.scopus.com/inward/record.uri?eid=2...,Traffic injuries and deaths are important road...
2,Carus L.; Mamaqi-Kapllani X.,"Carus, Luis (35112742100); Mamaqi-Kapllani, Xh...",35112742100; 58180739900,Managing Accident Prevention in Ski Resorts: P...,2023,International Journal of Environmental Researc...,10.3390/ijerph20075302,https://www.scopus.com/inward/record.uri?eid=2...,Velocity is one of the main factors affecting ...


In [None]:
scopus_df.columns

Index(['Authors', 'Author full names', 'Author(s) ID', 'Title', 'Year',
       'Source title', 'DOI', 'Link', 'Abstract'],
      dtype='object')

In [None]:
master_col

['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year']

In [None]:
scopus_select = pd.read_csv("/content/scopus_Paper.csv", usecols=[
    "Authors", "Title", "Source title", "Link", "DOI", "Year"
])

In [None]:
scopus_select.head(4)

Unnamed: 0,Authors,Title,Year,Source title,DOI,Link
0,Agina S.; Momeni Rad F.; El-Basyouny K.,Linking Deployment Outcomes to Program Impacts...,2023,Safety,10.3390/safety9040088,https://www.scopus.com/inward/record.uri?eid=2...
1,Ammar K.; Al-Emami A.; Baher A.,Real-time Vehicle Speed Enforcement System,2021,2021 10th Mediterranean Conference on Embedded...,10.1109/MECO52532.2021.9460297,https://www.scopus.com/inward/record.uri?eid=2...
2,Carus L.; Mamaqi-Kapllani X.,Managing Accident Prevention in Ski Resorts: P...,2023,International Journal of Environmental Researc...,10.3390/ijerph20075302,https://www.scopus.com/inward/record.uri?eid=2...
3,Braden A.W.; Parker I.D.; Lopez R.R.; Silvy N.J.,Temporal Movement Patterns Predict Collisions ...,2020,Southeastern Naturalist,10.1656/058.019.0409,https://www.scopus.com/inward/record.uri?eid=2...


In [None]:
scopus_reorder = scopus_select[["Authors", "Title", "Source title", "Link", "DOI", "Year"]]
scopus_reorder.head(3)

Unnamed: 0,Authors,Title,Source title,Link,DOI,Year
0,Agina S.; Momeni Rad F.; El-Basyouny K.,Linking Deployment Outcomes to Program Impacts...,Safety,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/safety9040088,2023
1,Ammar K.; Al-Emami A.; Baher A.,Real-time Vehicle Speed Enforcement System,2021 10th Mediterranean Conference on Embedded...,https://www.scopus.com/inward/record.uri?eid=2...,10.1109/MECO52532.2021.9460297,2021
2,Carus L.; Mamaqi-Kapllani X.,Managing Accident Prevention in Ski Resorts: P...,International Journal of Environmental Researc...,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/ijerph20075302,2023


In [None]:
rename_scopus_dict = {scopus_reorder.columns[i] : master_col[i] for i in range(len(master_col))}
scopus_reorder.rename(columns=rename_scopus_dict, inplace=True)
scopus_reorder.head(3)

Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,Agina S.; Momeni Rad F.; El-Basyouny K.,Linking Deployment Outcomes to Program Impacts...,Safety,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/safety9040088,2023
1,Ammar K.; Al-Emami A.; Baher A.,Real-time Vehicle Speed Enforcement System,2021 10th Mediterranean Conference on Embedded...,https://www.scopus.com/inward/record.uri?eid=2...,10.1109/MECO52532.2021.9460297,2021
2,Carus L.; Mamaqi-Kapllani X.,Managing Accident Prevention in Ski Resorts: P...,International Journal of Environmental Researc...,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/ijerph20075302,2023


In [None]:
scopus_reorder.shape

(420, 6)

In [None]:
scopus_reorder.to_excel("scopus_cleaned_420.xlsx")

# PubMed Dataset

In [None]:
PubMed_raw = pd.read_csv("/content/csv-SpeedEnfor-set.csv")
PubMed_raw.head(2)

Unnamed: 0,PMID,Title,Authors,Citation,First Author,Journal/Book,Publication Year,Create Date,PMCID,NIHMS ID,DOI
0,31656123,Speed in a high-speed society,Hydén C.,Int J Inj Contr Saf Promot. 2020 Mar;27(1):44-...,Hydén C,Int J Inj Contr Saf Promot,2020,2019/10/29,,,10.1080/17457300.2019.1680566
1,23474237,Effects of average speed enforcement on speed ...,"Soole DW, Watson BC, Fleiter JJ.",Accid Anal Prev. 2013 May;54:46-56. doi: 10.10...,Soole DW,Accid Anal Prev,2013,2013/03/12,,,10.1016/j.aap.2013.01.018


In [None]:
master_col

['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year']

In [None]:
Pubmed_selected = PubMed_raw[["Authors", "Title", "Journal/Book", "Citation", "DOI", "Publication Year"]]
Pubmed_selected.head(2)


Unnamed: 0,Authors,Title,Journal/Book,Citation,DOI,Publication Year
0,Hydén C.,Speed in a high-speed society,Int J Inj Contr Saf Promot,Int J Inj Contr Saf Promot. 2020 Mar;27(1):44-...,10.1080/17457300.2019.1680566,2020
1,"Soole DW, Watson BC, Fleiter JJ.",Effects of average speed enforcement on speed ...,Accid Anal Prev,Accid Anal Prev. 2013 May;54:46-56. doi: 10.10...,10.1016/j.aap.2013.01.018,2013


In [None]:
pubmed_reorder = Pubmed_selected[["Authors", "Title", "Journal/Book", "PMCID", "DOI", "Publication Year"]]
pubmed_reorder.head(2)

Unnamed: 0,Authors,Title,Journal/Book,PMCID,DOI,Publication Year
0,Hydén C.,Speed in a high-speed society,Int J Inj Contr Saf Promot,,10.1080/17457300.2019.1680566,2020
1,"Soole DW, Watson BC, Fleiter JJ.",Effects of average speed enforcement on speed ...,Accid Anal Prev,,10.1016/j.aap.2013.01.018,2013


In [None]:
def rename_cols(df, master_name):
  rename_dict = {df.columns[i]: master_name[i] for i in range(len(master_name))}
  end_df = df.rename(columns=rename_dict)

   ## validation box
  print("column names before changes:")
  print(df.columns)
  print(" ")
  print("column names after changes:")
  print(end_df.columns)

  return end_df

pubmed_rename = rename_cols(Pubmed_selected, master_col)

column names before changes:
Index(['Authors', 'Title', 'Journal/Book', 'Citation', 'DOI',
       'Publication Year'],
      dtype='object')
 
column names after changes:
Index(['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year'], dtype='object')


In [None]:
pubmed_rename.head(3)

Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,Hydén C.,Speed in a high-speed society,Int J Inj Contr Saf Promot,Int J Inj Contr Saf Promot. 2020 Mar;27(1):44-...,10.1080/17457300.2019.1680566,2020
1,"Soole DW, Watson BC, Fleiter JJ.",Effects of average speed enforcement on speed ...,Accid Anal Prev,Accid Anal Prev. 2013 May;54:46-56. doi: 10.10...,10.1016/j.aap.2013.01.018,2013
2,"Zhai Z, Xu J, Song G, Hatzopoulou M.","Comparative analysis of drive-cycles, speed li...",Sci Total Environ,Sci Total Environ. 2022 Mar 10;811:152323. doi...,10.1016/j.scitotenv.2021.152323,2022


In [None]:
pubmed_rename.to_excel("Pubmed_cleaned_74.xlsx")

# IEEE General

In [None]:
ie3x_raw = pd.read_csv("/content/export2024.05.21-16.32.43.csv")
ie3x_raw.head(3)

Unnamed: 0,Document Title,Authors,Author Affiliations,Publication Title,Date Added To Xplore,Publication Year,Volume,Issue,Start Page,End Page,...,Mesh_Terms,Article Citation Count,Patent Citation Count,Reference Count,License,Online Date,Issue Date,Meeting Date,Publisher,Document Identifier
0,A traffic speed enforcement system for high sp...,Kuan-Lin Chiu; C. -C. Lin; S. D. Gupta; Ching-...,"ICT Design & Validation for Vehicle Dep., Indu...",16th International IEEE Conference on Intellig...,30 Jan 2014,2013,,,1292,1297,...,,,,15.0,IEEE,30 Jan 2014,,,IEEE,IEEE Conferences
1,Scheduling resources in a mobile photo enforce...,Y. Li; A. Kim; K. El-Basyouny,Department of Civil and Environmental Engineer...,2017 4th International Conference on Transport...,21 Sep 2017,2017,,,645,652,...,,1.0,,13.0,IEEE,21 Sep 2017,,,IEEE,IEEE Conferences
2,Vehicle Speed Enforcement Using Absolute Speed...,M. Mandava; R. S. Gammenthaler; S. F. Hocker,"Applied Concepts, Inc., Richardson, TX, USA; A...",2018 IEEE 88th Vehicular Technology Conference...,14 Apr 2019,2018,,,1,5,...,,3.0,,14.0,IEEE,14 Apr 2019,,,IEEE,IEEE Conferences


In [None]:
ie3x_raw.columns

Index(['Document Title', 'Authors', 'Author Affiliations', 'Publication Title',
       'Date Added To Xplore', 'Publication Year', 'Volume', 'Issue',
       'Start Page', 'End Page', 'Abstract', 'ISSN', 'ISBNs', 'DOI',
       'Funding Information', 'PDF Link', 'Author Keywords', 'IEEE Terms',
       'Mesh_Terms', 'Article Citation Count', 'Patent Citation Count',
       'Reference Count', 'License', 'Online Date', 'Issue Date',
       'Meeting Date', 'Publisher', 'Document Identifier'],
      dtype='object')

In [None]:
master_col

['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year']

In [None]:
ie3_filtered = ie3x_raw[["Authors", "Document Title", "Author Keywords", "PDF Link",
                        "DOI", "Publication Year"]]
ie3_filtered.head(2)

Unnamed: 0,Authors,Document Title,Author Keywords,PDF Link,DOI,Publication Year
0,Kuan-Lin Chiu; C. -C. Lin; S. D. Gupta; Ching-...,A traffic speed enforcement system for high sp...,DSRC;IEEE 802.11p;Speed Enforcement;Speed Camera,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,10.1109/ITSC.2013.6728409,2013
1,Y. Li; A. Kim; K. El-Basyouny,Scheduling resources in a mobile photo enforce...,resource scheduling;mobile photo enforcement;i...,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,10.1109/ICTIS.2017.8047835,2017


In [None]:
ie3_rename = rename_cols(ie3_filtered, master_col)
ie3_rename.head(2)

column names before changes:
Index(['Authors', 'Document Title', 'Author Keywords', 'PDF Link', 'DOI',
       'Publication Year'],
      dtype='object')
 
column names after changes:
Index(['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year'], dtype='object')


Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,Kuan-Lin Chiu; C. -C. Lin; S. D. Gupta; Ching-...,A traffic speed enforcement system for high sp...,DSRC;IEEE 802.11p;Speed Enforcement;Speed Camera,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,10.1109/ITSC.2013.6728409,2013
1,Y. Li; A. Kim; K. El-Basyouny,Scheduling resources in a mobile photo enforce...,resource scheduling;mobile photo enforcement;i...,https://ieeexplore.ieee.org/stamp/stamp.jsp?ar...,10.1109/ICTIS.2017.8047835,2017


In [None]:
ie3_rename.shape

(47, 6)

In [None]:
ie3_rename.to_excel("ie3_cleaned_47.xlsx")

# Springer dataset

In [None]:
springer_data = pd.read_csv("/content/SearchResults (1).csv")
springer_data.head(2)

Unnamed: 0,Item Title,Publication Title,Book Series Title,Journal Volume,Journal Issue,Item DOI,Authors,Publication Year,URL,Content Type
0,Robust Vehicle Speed Estimation Based on Visio...,Intelligent Computing and Optimization,,,,10.1007/978-3-031-36246-0_4,Dea Angelia KamilWahyonoAgus Harjoko,2023,http://link.springer.com/chapter/10.1007/978-3...,Chapter
1,Impact of the decision-making environment on p...,Canadian Journal of Public Health,,108.0,5 - 6,10.17269/CJPH.108.6231,Curt J. Pankratz PhD,2017,http://link.springer.com/article/10.17269/CJPH...,Article


In [None]:
springer_data.columns

Index(['Item Title', 'Publication Title', 'Book Series Title',
       'Journal Volume', 'Journal Issue', 'Item DOI', 'Authors',
       'Publication Year', 'URL', 'Content Type'],
      dtype='object')

In [None]:
springer_filtered = springer_data[["Authors","Item Title", "Publication Title",
                                   "URL", "Item DOI", "Publication Year"]]
springer_filtered.head(2)

Unnamed: 0,Authors,Item Title,Publication Title,URL,Item DOI,Publication Year
0,Dea Angelia KamilWahyonoAgus Harjoko,Robust Vehicle Speed Estimation Based on Visio...,Intelligent Computing and Optimization,http://link.springer.com/chapter/10.1007/978-3...,10.1007/978-3-031-36246-0_4,2023
1,Curt J. Pankratz PhD,Impact of the decision-making environment on p...,Canadian Journal of Public Health,http://link.springer.com/article/10.17269/CJPH...,10.17269/CJPH.108.6231,2017


In [None]:
master_col

['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year']

In [None]:
springer_rename = rename_cols(springer_filtered, master_col)

column names before changes:
Index(['Authors', 'Item Title', 'Publication Title', 'URL', 'Item DOI',
       'Publication Year'],
      dtype='object')
 
column names after changes:
Index(['Author', 'Title', 'Title_2', 'Record_link', 'DOI', 'Year'], dtype='object')


In [None]:
springer_rename.shape

(172, 6)

In [None]:
springer_rename.to_excel("springer_cleaned_172.xlsx")

# Merging documents into one databases

In [None]:
trid_df = pd.read_excel("/content/TRID_cleaned_495.xlsx")
gg_df = pd.read_excel("/content/GGScholar_cleaned_359.xlsx")
scopus_df = pd.read_excel("/content/scopus_cleaned_420.xlsx")
pubmed_df = pd.read_excel("/content/Pubmed_cleaned_74.xlsx")
ie3_df = pd.read_excel("/content/ie3_cleaned_47.xlsx")
springer_df = pd.read_excel("/content/springer_cleaned_172.xlsx")

In [None]:
final_df = pd.concat([scopus_df, gg_df, pubmed_df, trid_df, springer_df, ie3_df], axis=0, ignore_index=True)
final_df.head(3)

Unnamed: 0.1,Unnamed: 0,Author,Title,Title_2,Record_link,DOI,Year
0,0,Agina S.; Momeni Rad F.; El-Basyouny K.,Linking Deployment Outcomes to Program Impacts...,Safety,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/safety9040088,2023.0
1,1,Ammar K.; Al-Emami A.; Baher A.,Real-time Vehicle Speed Enforcement System,2021 10th Mediterranean Conference on Embedded...,https://www.scopus.com/inward/record.uri?eid=2...,10.1109/MECO52532.2021.9460297,2021.0
2,2,Carus L.; Mamaqi-Kapllani X.,Managing Accident Prevention in Ski Resorts: P...,International Journal of Environmental Researc...,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/ijerph20075302,2023.0


In [None]:
final_df.columns[0]

'Unnamed: 0'

In [None]:
rename_index = {"Unnamed: 0": "index"}
final_df.rename(columns=rename_index, inplace=True)
final_df.set_index("index", inplace=True)
final_df.head(3)

Unnamed: 0_level_0,Author,Title,Title_2,Record_link,DOI,Year
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,Agina S.; Momeni Rad F.; El-Basyouny K.,Linking Deployment Outcomes to Program Impacts...,Safety,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/safety9040088,2023.0
1,Ammar K.; Al-Emami A.; Baher A.,Real-time Vehicle Speed Enforcement System,2021 10th Mediterranean Conference on Embedded...,https://www.scopus.com/inward/record.uri?eid=2...,10.1109/MECO52532.2021.9460297,2021.0
2,Carus L.; Mamaqi-Kapllani X.,Managing Accident Prevention in Ski Resorts: P...,International Journal of Environmental Researc...,https://www.scopus.com/inward/record.uri?eid=2...,10.3390/ijerph20075302,2023.0


In [None]:
final_df.shape

(1567, 6)

In [None]:
final_df.to_excel("title_screening_final_df_1567.xlsx")