In [22]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
from scipy.stats import linregress

In [23]:
df_2017 = pd.read_csv('data_unclean/2017.csv')
df_2018 = pd.read_csv('data_unclean/2018.csv')
df_2019 = pd.read_csv('data_unclean/2019.csv')
df_2021 = pd.read_csv('data_unclean/2021.csv')
df_2022 = pd.read_csv('data_unclean/2022.csv')

In [24]:
merged_df = pd.concat([df_2017, df_2018])
len(merged_df)

648

In [25]:
merged_df = pd.concat([merged_df, df_2019])
len(merged_df)

975

In [26]:
merged_df = pd.concat([merged_df, df_2021])
len(merged_df)

1297

In [27]:
merged_df = pd.concat([merged_df, df_2022])
len(merged_df)

1622

In [28]:
selected_columns = ['player name', 'school', 'source', 'hilvl', 'year']
merged_df = merged_df[selected_columns]

In [29]:
merged_df.columns = ['Player Name', 'School', 'Source', 'High Level', 'Year']

In [30]:
merged_df = merged_df.dropna(subset=['Player Name', 'School', 'Source', 'High Level', 'Year'])

In [31]:
merged_df = merged_df.reset_index(drop=True)

In [32]:
len(merged_df)

1572

In [33]:
hs_schools = merged_df.loc[merged_df['Source'] == 'HS']
hs_df = hs_schools[["School"]].drop_duplicates(subset = ['School'])


hs_df.to_csv('data_unclean/HS_list.csv')

In [34]:
co_schools = merged_df.loc[merged_df['Source'] == 'College']
co_df = co_schools[["School"]].drop_duplicates(subset = ['School'])

co_df.to_csv('data_unclean/CO_list.csv')

In [35]:
miami_check = merged_df.loc[merged_df['School'] == 'Miami']
miami_check

Unnamed: 0,Player Name,School,Source,High Level,Year
547,Zach Spears,Miami,College,A-,2018.0
905,Evan McKendry,Miami,College,AAA,2019.0
953,Sam Bachman,Miami,College,MLB,2021.0
1011,Adrian Del Castillo,Miami,College,AA,2021.0
1115,Jake Smith,Miami,College,A+,2021.0
1344,Carson Palmquist,Miami,College,A+,2022.0
1378,Alex McFarlane,Miami,College,A,2022.0
1504,Jonathan Brand,Miami,College,A,2022.0
1516,Maxwell Romero,Miami,College,A,2022.0


In [36]:
merged_df.loc[merged_df['Player Name'].isin(['Zach Spears','Sam Bachman','Jonathan Brand']),'School']='Miami2'


In [37]:
merged_df.loc[merged_df['Player Name'].isin(['Zach Spears','Sam Bachman','Jonathan Brand'])]

Unnamed: 0,Player Name,School,Source,High Level,Year
547,Zach Spears,Miami2,College,A-,2018.0
953,Sam Bachman,Miami2,College,MLB,2021.0
1504,Jonathan Brand,Miami2,College,A,2022.0


In [38]:
replaced_dict = {"Louisville":"University of Louisville",
                "Vanderbilt":"Vanderbilt University",
                "Virginia":"University of Virginia",
                "UC Irvine":"University of California, Irvine",
                "Missouri State":"Missouri State University",
                "North Carolina":"University of North Carolina",
                "South Carolina":"University of South Carolina",
                "Kentucky":"University of Kentucky",
                "Florida":"University of Florida",
                "Oregon":"University of Oregon",
                "Missouri":"University of Missouri",
                "Houston":"University of Houston",
                "St Col of FL":"State College of Florida Manatee-Sarasota",
                "Central Florida":"University of Central Florida",
                "LSU":"Louisiana State University",
                "Oregon State":"Oregon State University",
                "South Florida":"University of South Florida",
                "Mississippi State":"Mississippi State University",
                "Wake Forest":"Wake Forest University",
                "Cal Poly":"Cal Poly",
                "UCLA":"University of California, Los Angeles (UCLA)",
                "NC State":"North Carolina State University",
                "Texas A&M":"Texas A&M University",
                "Chipola":"Chipola College",
                "Texas":"University of Texas",
                "Loyola Marymount":"Loyola Marymount University",
                "Milwaukee":"University of Wisconsin-Milwaukee",
                "Xavier ":"Xavier University",
                "Arizona":"University of Arizona",
                "Florida State":"Florida State University",
                "Cal State Fullerton":"Cal State Fullerton",
                "Furman":"Furman University",
                "New Mexico":"University of New Mexico",
                "Iowa Western CC":"Iowa Western Community College",
                "Grayson County":"Grayson County College",
                "Arkansas":"University of Arkansas",
                "Gonzaga":"Gonzaga University",
                "Washington":"University of Washington",
                "Stanford":"Stanford University",
                "Jacksonville":"Jacksonville University",
                "San Diego":"University of San Diego",
                "UNC Charlotte":"University of North Carolina-Charlotte",
                "William and Mary":"College of William and Mary",
                "Oral Roberts":"Oral Roberts University",
                "Auburn":"Auburn University",
                "Clemson":"Clemson University",
                "St. Mary's (CA)":"St. Mary's College of California",
                "Santa Fe CC":"Santa Fe Community College",
                "St. Johns River CC":"St. Johns River State College",
                "Lipscomb":"Lipscomb University",
                "Notre Dame":"University of Notre Dame",
                "Dallas Baptist":"Dallas Baptist University",
                "Palm Beach State":"Palm Beach State College",
                "Tampa":"University of Tampa",
                "Kennesaw State":"Kennesaw State University",
                "Maryland":"University of Maryland",
                "SE Louisiana":"Southeastern Louisiana University",
                "Webster":"Webster University",
                "Mount Olive":"Mount Olive College",
                "Saint Joseph's":"Saint Joseph's University",
                "UC Riverside":"University of California-Riverside",
                "Rice":"Rice University",
                "Long Beach State":"Long Beach State University",
                "Utah":"University of Utah",
                "Texas Christian":"Texas Christian University",
                "Kent State":"Kent State University",
                "Oklahoma State":"Oklahoma State University",
                "Morehead State":"Morehead State University",
                "Old Dominion":"Old Dominion University",
                "Tennessee":"University of Tennessee",
                "Southern Miss":"University of Southern Mississippi",
                "Arkansas State":"Arkansas State University",
                "Iowa":"University of Iowa",
                "Augustana-SD":"Augustana College SD",
                "Michigan":"University of Michigan",
                "Sam Houston State":"Sam Houston State University",
                "New Mexico State":"New Mexico State University",
                "Connecticut":"University of Connecticut",
                "Seton Hall":"Seton Hall University",
                "Central Arizona College":"Central Arizona College",
                "Rider":"Rider University",
                "Villanova":"Villanova University",
                "Georgia Southern":"Georgia Southern University",
                "Lincoln Land CC":"Lincoln Land Community College",
                "Mercer County CC":"Mercer County Community College",
                "Minnesota":"University of Minnesota",
                "Alabama-Birmingham":"University of Alabama-Birmingham",
                "Texas Tech":"Texas Tech University",
                "Wabash Valley College":"Wabash Valley College",
                "University at Buffalo":"University at Buffalo",
                "CSUN":"Cal State Northridge",
                "Monroe CC":"Monroe Community College",
                "Fresno State":"Fresno State University",
                "Western Carolina":"Western Carolina University",
                "Illinois-Chicago":"University of Illinois-Chicago",
                "Faulkner":"Faulkner University",
                "Col of Idaho":"College of Idaho",
                "Lenoir-Rhyne":"Lenoir–Rhyne University",
                "USC Aiken":"The University of South Carolina Aiken",
                "Kansas":"University of Kansas",
                "South Alabama":"University of South Alabama",
                "Central Oklahoma":"Central Oklahoma College",
                "Radford":"Radford University",
                "Coastal Carolina":"Coastal Carolina University",
                "Pepperdine":"Pepperdine University",
                "Fordham":"Fordham University",
                "Oakland":"Oakland University",
                "Samford":"Samford University",
                "Itawamba CC":"Itawamba Community College",
                "MIT":"The Massachusetts Institute of Technology",
                "Virginia Tech":"Virginia Tech",
                "Georgia":"University of Georgia",
                "Middle Tenn State":"Middle Tennessee State University",
                "Indiana":"Indiana University",
                "Mesa State College":"Colorado Mesa University",
                "Hope Intl":"Hope International University",
                "Texas-Arlington":"University of Texas at Arlington",
                "East Carolina":"East Carolina University",
                "St. Bonaventure":"St. Bonaventure University",
                "Evansville":"Evansville, Indiana",
                "Pomona-Pitzer":"Pomona-Pitzer Colleges",
                "Bryant University":"Bryant University",
                "Michigan State":"Michigan State University",
                "Memphis":"University of Memphis",
                "Lock Haven Pennsylvania":"Lock Haven University of Pennsylvania",
                "Cal State Bakersfield":"Cal State Bakersfield",
                "Lewis-Clark Idaho":"Lewis–Clark State College",
                "Niagara":"Niagara University",
                "Oklahoma":"University of Oklahoma",
                "Dartmouth":"Dartmouth College",
                "California":"University of California",
                "Kansas State":"Kansas State University",
                "Southern Illinois":"Southern Illinois University",
                "San Diego State":"San Diego State University",
                "Adams State":"Adams State University",
                "Sacramento State":"Sacramento State",
                "St. John's":"St. John's University",
                "Georgia Tech":"Georgia Tech",
                "Wichita State":"Wichita State University",
                "Stetson":"Stetson University",
                "Mississippi":"University of Mississippi",
                "Duke":"Duke University",
                "Florida Atlantic":"Florida Atlantic University",
                "UNC Wilmington":"The University of North Carolina Wilmington",
                "McLennan CC":"McLennan Community College",
                "West Virginia":"West Virginia University",
                "Le Moyne":"Le Moyne College",
                "Grand Canyon":"Grand Canyon University",
                "Illinois":"University of Illinois",
                "Illinois State":"Illinois State University",
                "Louisiana-Lafayette":"University of Louisiana at Lafayette",
                "Texas A&M-Corpus Christi":"Texas A&M University–Corpus Christi",
                "UNLV":"The University of Nevada, Las Vegas",
                "Maine":"University of Maine",
                "Towson":"Towson University",
                "Hofstra":"Hofstra University",
                "Wallace State CC":"Wallace State Community College",
                "Tulane":"Tulane University",
                "Crowder":"Crowder College",
                "Ohio State":"Ohio State University",
                "North Florida":"University of North Florida ",
                "Tennessee Tech":"Tennessee Tech University",
                "Wright State":"Wright State University",
                "Wofford":"Wofford College",
                "John A. Logan":"John A. Logan College",
                "Mercyhurst":"Mercyhurst University",
                "Mercer":"Mercer University",
                "Troy":"Troy University",
                "Arizona State":"Arizona State University",
                "Whitworth":"Whitworth University",
                "UNC-Greensboro":"University of North Carolina Greensboro",
                "Southern Nevada":"Southern Nevada University",
                "Saint Louis":"Saint Louis University",
                "Pittsburgh":"Pittsburgh University",
                "New Orleans":"The University of New Orleans",
                "Grossmont":"Grossmont College",
                "Rutgers":"Rutgers University",
                "Nova Southeastern":"Nova Southeastern University",
                "Cal State Stanislaus":"California State University, Stanislaus",
                "Central Arkansas":"University of Central Arkansas",
                "Va Military Inst.":"Virginia Military Institute",
                "Northwestern State":"Northwestern State University",
                "Liberty":"Liberty University",
                "USC":"University of Southern California",
                "Santa Clara":"Santa Clara University",
                "Miami2":"Miami University - Oxford, Ohio",
                "Elon":"Elon University",
                "Cisco Junior College":"Cisco Junior College",
                "Canisius College":"Canisius College",
                "Bucknell":"Bucknell University",
                "George Mason":"George Mason University",
                "Saint Thomas":"St. Thomas University",
                "Seattle":"Seattle University",
                "Central Baptist":"Central Baptist College",
                "Lamar":"Lamar University",
                "Nebraska":"University of Nebraska",
                "Cincinnati":"University of Cincinnati",
                "Carson-Newman":"Carson-Newman University",
                "Saint Catharine":"Saint Catharine College",
                "Purdue":"Purdue University",
                "Western Kentucky":"Western Kentucky University",
                "Florida Southern":"Florida Southern College",
                "Baylor":"Baylor University",
                "San Jacinto":"San Jacinto College: Central Campus",
                "Ball State":"Ball State University",
                "Campbell":"Campbell University",
                "Lehigh":"Lehigh University",
                "Butler":"Butler University",
                "Creighton":"Creighton University",
                "Shepherd University":"Shepherd University",
                "Navy":"The United States Naval Academy",
                "Boston College":"Boston College",
                "UC Santa Barbara":"The University of California, Santa Barbara",
                "San Jose State":"San Jose State University",
                "SJ Delta College":"San Joaquin Delta College",
                "Nevada":"University of Nevada",
                "Florida Intl":"Florida International University",
                "Georgia State":"Georgia State University",
                "Wayne State University":"Wayne State University",
                "Penn State":"Penn State University",
                "Winthrop":"Winthrop University",
                "Brigham Young":"Brigham Young University ",
                "Eastern Kentucky":"Eastern Kentucky University",
                "Texas-San Antonio":"The University of Texas at San Antonio",
                "James Madison":"James Madison University ",
                "Northwestern":"Northwestern University",
                "College of Charleston":"College of Charleston",
                "George Washington":"The George Washington University",
                "Miami Dade":"Miami Dade College",
                "VCU":"Virginia Commonwealth University",
                "Indiana State":"Indiana State University",
                "Yale":"Yale University",
                "Ohio":"Ohio University",
                "Texas State":"Texas State University",
                "Cowley College":"Cowley College",
                "Fresno City":"Fresno City College",
                "Lincoln Memorial":"Lincoln Memorial University",
                "West Texas A&M":"West Texas A&M University",
                "Lubbock Christian":"Lubbock Christian University",
                "Miss Gulf Coast":"Mississippi Gulf Coast Community College",
                "Franklin Pierce":"Franklin Pierce University",
                "Antelope Valley College":"Antelope Valley College",
                "Harvard":"Harvard University",
                "Eastern Illinois":"Eastern Illinois University",
                "Washington State":"Washington State University",
                "Golden West":"Golden West College",
                "SE Missouri State":"Southeast Missouri State University",
                "Gardner-Webb":"Gardner-Webb University",
                "Purdue-NW":"Purdue University Northwest",
                "Bradley":"Bradley University",
                "Florida SW":"Florida SouthWestern State College",
                "Quincy":"Quincy University",
                "Hawaii":"University Of Hawaii",
                "Eastern OK ST":"Eastern Oklahoma State College",
                "Lee University":"Lee University",
                "Biola":" Biola University",
                "USC-Upstate":"University of South Carolina Upstate",
                "Florence-Darlington Tech":"Florence-Darlington Technical College",
                "Louisiana Tech":"Louisiana Tech University",
                "Angelo State":"Angelo State University",
                "McNeese State":"McNeese State University",
                "British Columbia":"The University of British Columbia",
                "Eastern Michigan":"Eastern Michigan University",
                "Rowan-Gloucester":"Rowan College South Jersey",
                "Azusa Pacific":"Azusa Pacific University",
                "San Francisco":"University of San Francisco",
                "Quinnipiac":"Quinnipiac University",
                "North Carolina Central":"North Carolina Central University",
                "Central Michigan":"Central Michigan University",
                "NW Florida State":"Northwest Florida State College",
                "Monmouth":"Monmouth University",
                "Notre Dame College":"Notre Dame College",
                "Belmont":"Belmont University",
                "Riverside CC":"Riverside Community College",
                "Cal Poly Pomona":"Cal Poly Pomona",
                "Davidson":"Davidson College",
                "Toledo":"University of Toldeo",
                "Northeastern":"Northeastern University",
                "Youngstown State":"Youngstown State University",
                "Valparaiso":"Valparaiso University",
                "California Baptist":"CALIFORNIA BAPTIST UNIVERSITY",
                "Portland":"University of Portland",
                "Assumption College":"ASSUMPTION COLLEGE",
                "SIU-Edwardsville":"SOUTHERN ILLINOIS UNIVERSITY-EDWARDSVILLE",
                "Wingate":"Wingate University",
                "South Mountain CC":"SOUTH MOUNTAIN COMMUNITY COLLEGE",
                "Queens of NC":"QUEENS COLLEGE OF CHARLOTTE",
                "Westmont College":"Westmont College",
                "Miami":"University of Miami",
                "Alabama":"University of Alabama",
                "New York":"New York University"}
merged_df['School'].replace(replaced_dict, inplace = True)



In [39]:
merged_df.head()

Unnamed: 0,Player Name,School,Source,High Level,Year
0,Royce Lewis,"JSerra Catholic (San Juan Capistrano,CA)",HS,MLB,2017.0
1,Hunter Greene,"Notre Dame (Sherman Oaks,CA)",HS,MLB,2017.0
2,MacKenzie Gore,"Whiteville (Whiteville,NC)",HS,MLB,2017.0
3,Brendan McKay,University of Louisville,College,MLB,2017.0
4,Kyle Wright,Vanderbilt University,College,MLB,2017.0


In [51]:
master_list = merged_df
len(master_list['School'].unique())
master_list.reset_index()
master_list.head()

Unnamed: 0,Player Name,School,Source,High Level,Year
0,Royce Lewis,"JSerra Catholic (San Juan Capistrano,CA)",HS,MLB,2017.0
1,Hunter Greene,"Notre Dame (Sherman Oaks,CA)",HS,MLB,2017.0
2,MacKenzie Gore,"Whiteville (Whiteville,NC)",HS,MLB,2017.0
3,Brendan McKay,University of Louisville,College,MLB,2017.0
4,Kyle Wright,Vanderbilt University,College,MLB,2017.0


In [52]:
master_list.to_csv('data_unclean/Players.csv',index=False)