In [1]:
import psycopg2
from psycopg2 import OperationalError
import pandas as pd

In [2]:
def create_connection(db_name, db_user, db_password, db_host, db_port):
    connection = None
    try:
        connection = psycopg2.connect(
            database=db_name,
            user=db_user,
            password=db_password,
            host=db_host,
            port=db_port,
        )
        print("Connection to PostgreSQL DB successful")
    except OperationalError as e:
        print(f"The error '{e}' occurred")
    return connection

In [3]:
connection = create_connection(
    "nicholasmontalbano", "postgres", "", "127.0.0.1", "5432"
)

Connection to PostgreSQL DB successful


In [4]:
cursor = connection.cursor()

In [22]:
cursor.execute("ROLLBACK")
connection.commit()

In [13]:
sql_command = """SELECT *
FROM pg_catalog.pg_tables
WHERE schemaname != 'pg_catalog' AND 
    schemaname != 'information_schema';"""
cursor.execute(sql_command)
cursor.fetchall()

[('public', 'people_mlb', 'postgres', None, True, False, False, False),
 ('public', 'batting_mlb', 'postgres', None, False, False, False, False),
 ('public',
  'old_people_mlb',
  'nicholasmontalbano',
  None,
  False,
  False,
  False,
  False),
 ('public',
  'career_batting_mlb',
  'postgres',
  None,
  False,
  False,
  False,
  False),
 ('public', 'appearances_mlb', 'postgres', None, False, False, False, False)]

In [None]:
sql_command = """CREATE TABLE people_MLB (
    playerID varchar PRIMARY KEY, 
    birthYear integer,
    birthMonth integer,
    birthDay integer,
    birthCountry varchar,
    birthState varchar,
    birthCity varchar,
    deathYear integer,
    deathMonth integer,
    deathDay integer, 
    deathCountry varchar,
    deathState varchar,
    deathCity varchar,
    nameFirst varchar,
    nameLast varchar,
    nameGiven varchar,
    weight integer,
    height integer,
    bats varchar,
    throws varchar,
    debut date, 
    finalGame date, 
    retroID varchar,
    bbrefID varchar
); """
cursor.execute(sql_command)

In [None]:
f = open('../data/baseballdatabank-master/core/People.csv', 'r') 
cols = f.readline().split(',')
cursor.copy_from(f, 'people_mlb', ",", columns = cols, null="")
connection.commit()

In [None]:
sql_command = """CREATE TABLE batting_MLB (
    playerID varchar,
    yearID varchar,
    stint integer,
    teamID varchar,
    lgID varchar,
    G integer,
    AB integer,
    R integer,
    H integer,
    DOUBLE integer, 
    TRIPLE integer,
    HR integer,
    RBI integer,
    SB integer,
    CS integer,
    BB integer,
    SO integer,
    IBB integer,
    HBP integer,
    SH integer,
    SF integer, 
    GIDP integer
); """
cursor.execute(sql_command)

In [None]:
f = open('../data/baseballdatabank-master/core/Batting.csv', 'r') 
cols = f.readline().strip('\n').split(',')
cursor.copy_from(f, 'batting_mlb', ",", columns = cols, null="")
connection.commit()

In [26]:
sql_command = """CREATE TABLE appearances_mlb (
    yearID varchar,
    teamID varchar,
    lgID varchar,
    playerID varchar,
    G_all integer,
    GS integer,
    G_batting integer,
    G_defense integer,
    G_p integer, 
    G_c integer,    
    G_1b integer,
    G_2b integer,
    G_3b integer,
    G_ss integer,
    G_lf integer,
    G_cf integer,
    G_rf integer,
    G_of integer,
    G_dh integer,
    G_ph integer,
    G_pr integer
); """
cursor.execute(sql_command)

In [27]:
f = open('../data/baseballdatabank-master/core/appearances.csv', 'r') 
cols = f.readline().strip('\n').split(',')
cursor.copy_from(f, 'appearances_mlb', ",", columns = cols, null="")
connection.commit()

In [None]:
sql_command = """CREATE TABLE career_batting_mlb (
    playerID varchar,
    AB integer,
    R integer,
    H integer,
    DOUBLE integer, 
    TRIPLE integer,
    HR integer,
    RBI integer,
    SB integer,
    CS integer,
    BB integer,
    SO integer,
    IBB integer,
    HBP integer,
    SH integer,
    SF integer, 
    GIDP integer,
    G_all integer,
    GS integer,
    G_batting integer,
    G_defense integer,
    G_p integer, 
    G_c integer,    
    G_1b integer,
    G_2b integer,
    G_3b integer,
    G_ss integer,
    G_lf integer,
    G_cf integer,
    G_rf integer,
    G_of integer,
    G_dh integer,
    G_ph integer,
    G_pr integer
); """
cursor.execute(sql_command)

In [None]:
sql_command = """INSERT INTO career_batting_mlb
SELECT * FROM
(SELECT playerID, 
SUM(AB), SUM(R), SUM(H), SUM(DOUBLE), SUM(TRIPLE), SUM(HR), SUM(RBI), SUM(SB), SUM(CS), SUM(BB), 
SUM(SO), SUM(IBB), SUM(HBP), SUM(SH), SUM(SF), SUM(GIDP)
FROM batting_mlb
GROUP BY playerID) a

LEFT JOIN (
SELECT playerID, 
SUM (G_all), SUM(GS), SUM(G_batting), SUM(G_defense), SUM(G_p), SUM(G_c), SUM(G_1b), SUM(G_2b), 
SUM(G_3b), SUM(G_ss), SUM(G_lf), SUM(G_cf), SUM(G_rf), SUM(G_of), SUM(G_dh), SUM(G_ph), SUM(G_pr)
FROM appearances_mlb
GROUP BY playerID) b

USING(playerID)
"""
cursor.execute(sql_command)

In [None]:
sql_command = """
ALTER TABLE career_batting_mlb
ADD COLUMN PP varchar
"""
cursor.execute(sql_command)

In [None]:
sql_command = """
UPDATE career_batting_mlb 
SET PP = 
CASE greatest(G_c, G_1b, G_2b, G_3b, G_ss, G_of, G_dh)
    WHEN G_c THEN 'catcher'
    WHEN G_1b THEN '1b'
    WHEN G_2b THEN '2b'
    WHEN G_3b THEN '3b'
    WHEN G_ss THEN 'ss'
    WHEN G_of THEN 'of'
    WHEN G_dh THEN 'dh'
    ELSE 'NONE'
END;
"""
cursor.execute(sql_command)

In [24]:
sql_command = "SELECT * FROM appearances_mlb"
cursor.execute(sql_command)

In [17]:
connection.commit()