# Работа с базами данных

## Postgres

### Установка pcycopg

In [1]:
%pip install psycopg2

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Подключение к базе данных

In [4]:
from psycopg2 import OperationalError
import psycopg2

def create_connection(db_name:str, db_user:str, db_password:str, db_host:str, db_port:int):
    connection = None
    try:
        connection = psycopg2.connect(
            database=db_name,
            user=db_user,
            password=db_password,
            host=db_host,
            port=db_port,
        )
        print("Connection to PostgreSQL DB successful")
    except OperationalError as e:
        print(f"The error '{e}' occurred")
    return connection

In [5]:
connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )

Connection to PostgreSQL DB successful


### Создание таблиц

In [8]:
def execute_query(connection, query):
    connection.autocommit = True
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        print("Query executed successfully")
        cursor.close()  # закрываем курсор
    except OperationalError as e:
        cursor.close()  # закрываем курсор
        print(f"The error '{e}' occurred")

In [9]:
create_users_table = """
CREATE TABLE IF NOT EXISTS users (
  id SERIAL PRIMARY KEY,
  name TEXT NOT NULL, 
  age INTEGER,
  gender TEXT,
  nationality TEXT
)
"""

In [11]:
execute_query(connection, create_users_table)
connection.close()    # закрываем подключение

Query executed successfully


### Вставка данных

In [14]:
users = [
    ("James", 25, "male", "USA"),
    ("Leila", 32, "female", "France"),
    ("Brigitte", 35, "female", "England"),
    ("Mike", 40, "male", "Denmark"),
    ("Elizabeth", 21, "female", "Canada"),
]

user_records = ", ".join(["%s"] * len(users))

insert_query = f"INSERT INTO users (name, age, gender, nationality) VALUES {user_records}"

connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )
connection.autocommit = True
cursor = connection.cursor()
cursor.execute(insert_query, users)
cursor.close()
connection.close()

Connection to PostgreSQL DB successful


### Множественная вставка строк

In [21]:
users = [
    ("James", 25, "male", "USA"),
    ("Leila", 32, "female", "France"),
    ("Brigitte", 35, "female", "England"),
    ("Mike", 40, "male", "Denmark"),
    ("Elizabeth", 21, "female", "Canada"),
]

insert_query = "INSERT INTO users (name, age, gender, nationality) VALUES (%s,%s,%s,%s);"

connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )
connection.autocommit = True
cursor = connection.cursor()
cursor.executemany(insert_query, users)
cursor.close()
connection.close()

Connection to PostgreSQL DB successful


### Извлечение данных из записей

In [22]:
def execute_read_query(connection, query):
    cursor = connection.cursor()
    result = None
    try:
        cursor.execute(query)
        result = cursor.fetchall()
        cursor.close()
        return result
    except OperationalError as e:
        cursor.close()
        print(f"The error '{e}' occurred")

select_users = "SELECT * FROM users"

connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )
users = execute_read_query(connection, select_users)
connection.close()

for user in users:
    print(user)

Connection to PostgreSQL DB successful
(11, 'James', 25, 'male', 'USA')
(12, 'Leila', 32, 'female', 'France')
(13, 'Brigitte', 35, 'female', 'England')
(14, 'Mike', 40, 'male', 'Denmark')
(15, 'Elizabeth', 21, 'female', 'Canada')


### Обновление записей в таблице

In [23]:
update_user_nationality = """
UPDATE
  users
SET
  nationality = 'russian'
WHERE
  id = 2
"""

connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )
execute_query(connection,  update_user_nationality)
connection.close()

Connection to PostgreSQL DB successful
Query executed successfully


### Удаление записей в т

In [24]:
delete_user = "DELETE FROM users WHERE id = 5"

connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )
execute_query(connection, delete_user)
connection.close()

Connection to PostgreSQL DB successful
Query executed successfully


### Pandas и Psycopg2

In [25]:
import pandas as pd

In [26]:
query = '''
select fare_conditions , sum(tf.amount) from tickets t left join ticket_flights tf on tf.ticket_no = t.ticket_no group by fare_conditions ;
'''

In [27]:
connection = create_connection( "demo", "postgres", "password", "127.0.0.1", "5432" )
data_frame = pd.read_sql(query,connection)
connection.close()

Connection to PostgreSQL DB successful


  data_frame = pd.read_sql(query,connection)


In [28]:
data_frame

Unnamed: 0,fare_conditions,sum
0,Business,5505180000.0
1,Comfort,566116900.0
2,Economy,14695680000.0


### Pandas и Sqlalchemy

In [29]:
%pip install sqlalchemy

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [31]:
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.sql import text

sql_query = '''
select fare_conditions , sum(tf.amount) from tickets t left join ticket_flights tf on tf.ticket_no = t.ticket_no group by fare_conditions ;
'''

engine = create_engine(
    "{dialect}+{driver}://{username}:{password}@{host}:{port}/{database}".format(
        dialect="postgresql",
        driver="psycopg2",
        username="postgres",
        password="password",
        host="localhost",
        port=5432,
        database="demo"
    )
)

with engine.connect() as db_conn:
    data_frame = pd.read_sql(sql=text(sql_query), con=db_conn)
data_frame

Unnamed: 0,fare_conditions,sum
0,Business,5505180000.0
1,Comfort,566116900.0
2,Economy,14695680000.0


## Clickhouse

In [1]:
%pip install clickhouse-connect

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.2.1 -> 23.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


### Подключение к базе данных

In [2]:
import clickhouse_connect

client = clickhouse_connect.get_client(host='localhost', username='default', password='')

### Создание таблиц

In [3]:
client.command('CREATE TABLE new_table (key UInt32, value String, metric Float64) ENGINE MergeTree ORDER BY key')

<clickhouse_connect.driver.summary.QuerySummary at 0x2035aed3790>

### Вставка данных

In [4]:
row1 = [1000, 'String Value 1000', 5.233]
row2 = [2000, 'String Value 2000', -107.04]
data = [row1, row2]
client.insert('new_table', data, column_names=['key', 'value', 'metric'])

<clickhouse_connect.driver.summary.QuerySummary at 0x2035b2b8c90>

### Извлечение данных из таблиц

In [5]:
result = client.query('SELECT max(key), avg(metric) FROM new_table')
result.result_rows

[(2000, -50.9035)]

### Pandas и clickhouse

In [8]:
df_stream = client.query_df_stream('SELECT * FROM new_table')
column_names = df_stream.source.column_names
with df_stream:
    for df in df_stream:
        print(df)

    key              value   metric
0  1000  String Value 1000    5.233
1  2000  String Value 2000 -107.040
