#### ETL

Install psycopg2 to run SQL with python
#!pip install psycopg2

In [1]:
pip show psycopg2

Name: psycopg2
Version: 2.9.9
Summary: psycopg2 - Python-PostgreSQL Database Adapter
Home-page: https://psycopg.org/
Author: Federico Di Gregorio
Author-email: fog@initd.org
License: LGPL with exceptions
Location: /Users/Manpreet/anaconda3/lib/python3.10/site-packages
Requires: 
Required-by: 
Note: you may need to restart the kernel to use updated packages.


For connection provide 
- username
- password
- host
- Dbname

Create cursor to run the SQL commands

In [9]:
import psycopg2

def execute_sql(query, params=None):
    """
    Execute SQL query with optional parameters and return fetched rows.
    """
    conn = psycopg2.connect(database="fifa19", user="postgres", password="123456", host="localhost")
    cur = conn.cursor()
    
    try:
        if params is None:
            cur.execute(query)
        else:
            cur.execute(query, params)
        
        rows = cur.fetchall()  # Fetch all rows from the result set
        return rows

    except psycopg2.Error as e:
        print("Error:", e)
        conn.rollback()  # Rollback the transaction if an error occurs
        return None

    finally:
        cur.close()  # Close the cursor
        conn.close()  # Close the connection

In [12]:
# Execute SQL commands
players = execute_sql("SELECT * FROM players limit 5")
if players is not None:
    for row in players:
        print(row)

(158023, 'L Messi', 31, 'argentina', 94, 94, 'FC Barcelona', 110500000, 565000, 'Left', 10, datetime.date(2004, 7, 1), "5'7", 159, 75)
(20801, 'Cristiano Ronaldo', 33, 'Portugal', 94, 94, 'Juventus', 77000000, 405000, 'Right', 7, datetime.date(2018, 7, 10), "6'2", 183, 85)
(190871, 'Neymar Jr', 26, 'Brazil', 92, 93, 'Paris Saint-Germain', 118500000, 290000, 'Right', 10, datetime.date(2017, 8, 3), "5'9", 150, 81)
(193080, 'De Gea', 27, 'Spain', 91, 93, 'Manchester United', 72000000, 260000, 'Right', 1, datetime.date(2011, 7, 1), "6'4", 168, 40)
(192985, 'K De Bruyne', 27, 'Belgium', 91, 92, 'Manchester City', 102000000, 355000, 'Right', 7, datetime.date(2015, 8, 30), "5'11", 154, 79)


In [13]:
# Execute SQL commands
table = execute_sql("SELECT table_name FROM information_schema.tables WHERE table_schema='public'")
if table is not None:
    for row in table:
        print(row)

('persons',)
('players',)
('email',)
('email1',)


#### Create table

In [18]:
# Connect to the database and create cursor
conn = psycopg2.connect(database="fifa19", user="postgres", password="123456", host="localhost")
cur = conn.cursor()

    
try:
    # Define the SQL statement for creating a table
    create_table_query = '''
        CREATE TABLE customers (
            first_name VARCHAR(100),
            last_name VARCHAR(100),
            dob DATE,
            email TEXT
        )
    '''

    # Execute the SQL statement to create the table
    cur.execute(create_table_query)

    # Commit the transaction
    conn.commit()

except psycopg2.Error as e:
    print("Error creating table:", e)
    conn.rollback()

# Close the cursor
cur.close()  # Close the cursor
conn.close()  # Close the connection


### Insert data

In [19]:
# Connect to the database and create cursor
conn = psycopg2.connect(database="fifa19", user="postgres", password="123456", host="localhost")
cur = conn.cursor()

try:
    # Define the SQL statement for creating a table
    insert_data_query = '''
        INSERT INTO customers (first_name,last_name,dob,email)
        values ('Henry','Lewis','09-08-2012','henry.lewis@gmail.com');
    '''

    # Execute the SQL statement to create the table
    cur.execute(insert_data_query)

    # Commit the transaction
    conn.commit()

except psycopg2.Error as e:
    print("Error creating table:", e)
    conn.rollback()

# Close the cursor
cur.close()  # Close the cursor
conn.close()  # Close the connection

In [20]:
# Execute SQL commands
cust = execute_sql("Select * from customers")
if cust is not None:
    for row in cust:
        print(row)


('Henry', 'Lewis', datetime.date(2012, 9, 8), 'henry.lewis@gmail.com')


### Insert data from csv

In [24]:
import csv
from datetime import datetime 

# Connect to the database and create cursor
conn = psycopg2.connect(database="fifa19", user="postgres", password="123456", host="localhost")
cur = conn.cursor()

with open('/Users/Manpreet/Documents/DataAnalysis/SQL/email.csv', 'r') as file:
    reader = csv.reader(file)
    next(reader)  # Skip the header row if exists
    for row in reader:
        # Execute INSERT statement to add data to the database
        # Parse date and handle invalid formats
        try:
            dob = datetime.strptime(row[2], '%Y-%m-%d').date()
        except ValueError:
            print("Invalid date format in row:", row)
            continue
        try:
            cur.execute(
                "INSERT INTO customers (first_name, last_name, dob, email) VALUES (%s, %s, %s, %s)",
                (row[0], row[1], dob, row[3])
            )
        except psycopg2.Error as e:
            print("Error inserting data:", e)
            continue

# Commit the transaction
conn.commit()


# Close the cursor
cur.close()  # Close the cursor
conn.close()  # Close the connection

In [25]:
# Execute SQL commands
cust = execute_sql("Select * from customers")
if cust is not None:
    for row in cust:
        print(row)


('Henry', 'Lewis', datetime.date(2012, 9, 8), 'henry.lewis@gmail.com')
('David', 'farari', datetime.date(1989, 2, 12), 'David.farari@gmail.com')
('kisrten', 'christian', datetime.date(1980, 3, 13), 'kisrten.christian@gmail.com')
('ronald', 'trump', datetime.date(1970, 7, 24), 'ronald.trump@gmail.com')
('Ashley', 'quigg', datetime.date(1999, 6, 27), 'Ashley.quigg@gmail.com')
('megan', 'morrison', datetime.date(1979, 4, 12), 'megan.morrison@gmail.com')


### Insert multiple rows 

In [26]:
data = [('Devid','morrison','12-12-2002','abc.def@gmail.com'),
        ('Peter','quigg','12-12-2002','peter@gmail.com'),
        ('Aliff','blasphet','12-12-2002','aliff@gmail.com'),
        ('Mukesh','Sharma','12-12-2002','mukesh.sharma@gmail.com')]

In [27]:
query = "insert into email values (%s, %s, %s, %s)"

In [29]:
# Connect to the database and create cursor
conn = psycopg2.connect(database="fifa19", user="postgres", password="123456", host="localhost")
cur = conn.cursor()

#Insert multiple rows 
cur.executemany(query,data)
conn.commit()

# Close the cursor
cur.close()  # Close the cursor
conn.close()  # Close the connection

In [30]:
# Execute SQL commands
cust = execute_sql("Select * from customers")
if cust is not None:
    for row in cust:
        print(row)

('Henry', 'Lewis', datetime.date(2012, 9, 8), 'henry.lewis@gmail.com')
('David', 'farari', datetime.date(1989, 2, 12), 'David.farari@gmail.com')
('kisrten', 'christian', datetime.date(1980, 3, 13), 'kisrten.christian@gmail.com')
('ronald', 'trump', datetime.date(1970, 7, 24), 'ronald.trump@gmail.com')
('Ashley', 'quigg', datetime.date(1999, 6, 27), 'Ashley.quigg@gmail.com')
('megan', 'morrison', datetime.date(1979, 4, 12), 'megan.morrison@gmail.com')
