In [6]:
from clickhouse_connect import get_client
import pandas as pd

client = get_client(host='localhost', username='default', password='123')

print("Connected to ClickHouse server successfully!")
print(client.query('SELECT version()').result_rows)


Connected to ClickHouse server successfully!
[('25.10.1.2351',)]


In [7]:
client.command("CREATE DATABASE IF NOT EXISTS university_demo")
client.command("USE university_demo")
print("Database 'university_demo' created and selected.")

Database 'university_demo' created and selected.


In [8]:

client.command("""
CREATE TABLE IF NOT EXISTS university_demo.students (
    student_id UUID DEFAULT generateUUIDv4(),
    name String,
    age UInt8,
    country String
) ENGINE = MergeTree()
ORDER BY (name);
""")

# Courses table
client.command("""
CREATE TABLE IF NOT EXISTS university_demo.courses (
    course_id UUID DEFAULT generateUUIDv4(),
    title String,
    department String
) ENGINE = MergeTree()
ORDER BY (department, title);
""")

# Enrollments table (joins students to courses)
client.command("""
CREATE TABLE IF NOT EXISTS university_demo.enrollments (
    enroll_id UUID DEFAULT generateUUIDv4(),
    student_id UUID,
    course_id UUID,
    grade Float32
) ENGINE = MergeTree()
ORDER BY (student_id, course_id);
""")

print("Created tables: students, courses, enrollments.")

Created tables: students, courses, enrollments.


In [10]:
client.command("SELECT * FROM university_demo.students")

['437f9531-4267-413f-90a2-744d85a752c4',
 'Alice',
 '22',
 'UK\n5a5f051b-3f94-4289-8b89-2c008911bc18',
 'Alice',
 '22',
 'UK\nab9f1922-3a54-4eb5-91b1-7d62144fd046',
 'Bob',
 '24',
 'USA\n408395eb-a956-4fd5-8a90-ac6af7f3c7aa',
 'Bob',
 '24',
 'USA\n3e3649dc-48d3-4e0a-ac46-462e3bbba79c',
 'Carlos',
 '23',
 'Spain\n5d482139-2cea-4c72-a2c9-711c3e04a0e3',
 'Carlos',
 '23',
 'Spain\n5d7e9497-e9dd-4848-9b12-8343023d5a1d',
 'Diana',
 '21',
 'Germany\nb713dcc5-3196-4f13-8f83-156fd70c215c',
 'Diana',
 '21',
 'Germany']

In [11]:
students = [
    ("Ahsan", 22, "UAE"),
    ("Bilal", 24, "KSA"),
    ("Chaudhary", 23, "India"),
    ("Daaim", 21, "Pakistan"),
]
client.insert("university_demo.students", students, column_names=["name", "age", "country"])
print("Inserted student data.")

Inserted student data.


In [13]:
client.query_df("SELECT * FROM university_demo.students")

Unnamed: 0,student_id,name,age,country
0,437f9531-4267-413f-90a2-744d85a752c4,Alice,22,UK
1,5a5f051b-3f94-4289-8b89-2c008911bc18,Alice,22,UK
2,ab9f1922-3a54-4eb5-91b1-7d62144fd046,Bob,24,USA
3,408395eb-a956-4fd5-8a90-ac6af7f3c7aa,Bob,24,USA
4,3e3649dc-48d3-4e0a-ac46-462e3bbba79c,Carlos,23,Spain
5,5d482139-2cea-4c72-a2c9-711c3e04a0e3,Carlos,23,Spain
6,5d7e9497-e9dd-4848-9b12-8343023d5a1d,Diana,21,Germany
7,b713dcc5-3196-4f13-8f83-156fd70c215c,Diana,21,Germany
8,ba73bacd-145b-42c1-92b5-ef697e0e1c7c,Ahsan,22,UAE
9,a26984c1-41c5-438c-bab9-8c13aac0eaa7,Bilal,24,KSA


In [12]:
import pandas as pd

x = client.command("""SELECT * FROM university_demo.students
""")

df = pd.DataFrame(x)

df

Unnamed: 0,0
0,ba73bacd-145b-42c1-92b5-ef697e0e1c7c
1,Ahsan
2,22
3,UAE\na26984c1-41c5-438c-bab9-8c13aac0eaa7
4,Bilal
5,24
6,KSA\nd72277f2-93fb-4afc-9c82-2f27633b7890
7,Chaudhary
8,23
9,India\n60aa31b2-70b6-4393-aaa5-8691366c898e


In [4]:
# ============================================
# 4. Insert data
# ============================================

# Insert students


# Insert courses
courses = [
    ("Databases", "Computer Science"),
    ("Machine Learning", "Computer Science"),
    ("Econometrics", "Economics"),
    ("Marketing", "Business"),
]
client.insert("university_demo.courses", courses, column_names=["title", "department"])
print("üìö Inserted course data.")

# Fetch the generated UUIDs for joining
students_df = client.query_df("SELECT * FROM university_demo.students")
courses_df = client.query_df("SELECT * FROM university_demo.courses")

print("üéì Students Table:")
print(students_df)
print("üìò Courses Table:")
print(courses_df)

# Map some enrollments
enrollments = [
    (students_df.loc[0, "student_id"], courses_df.loc[0, "course_id"], 95.0),
    (students_df.loc[1, "student_id"], courses_df.loc[1, "course_id"], 88.0),
    (students_df.loc[2, "student_id"], courses_df.loc[0, "course_id"], 92.5),
    (students_df.loc[3, "student_id"], courses_df.loc[2, "course_id"], 76.5),
]
client.insert("university_demo.enrollments", enrollments, column_names=["student_id", "course_id", "grade"])
print("üßæ Inserted enrollments data.")

üë©‚Äçüéì Inserted student data.
üìö Inserted course data.
üéì Students Table:
                             student_id    name  age  country
0  437f9531-4267-413f-90a2-744d85a752c4   Alice   22       UK
1  ab9f1922-3a54-4eb5-91b1-7d62144fd046     Bob   24      USA
2  3e3649dc-48d3-4e0a-ac46-462e3bbba79c  Carlos   23    Spain
3  5d7e9497-e9dd-4848-9b12-8343023d5a1d   Diana   21  Germany
üìò Courses Table:
                              course_id             title        department
0  f14c6518-7345-4c06-9b59-ed767fb13577         Marketing          Business
1  28a8a896-a274-4fe6-bef1-851f9b93adfb         Databases  Computer Science
2  5bf4800d-8e2d-472b-bc0c-e04a4ea2b090  Machine Learning  Computer Science
3  e4e82a5f-b9cc-492e-b891-0d3fd8e01949      Econometrics         Economics
üßæ Inserted enrollments data.


In [5]:
# ============================================
# 5. Query examples
# ============================================

# Select all students
print("\nüëÄ All Students:")
print(client.query_df("SELECT name, age, country FROM university_demo.students"))

# Simple join between students and enrollments
print("\nüîó Join Example (Students + Enrollments):")
join_df = client.query_df("""
SELECT s.name, s.country, c.title, e.grade
FROM university_demo.enrollments e
JOIN university_demo.students s ON e.student_id = s.student_id
JOIN university_demo.courses c ON e.course_id = c.course_id
ORDER BY s.name
""")
print(join_df)


üëÄ All Students:
     name  age  country
0   Alice   22       UK
1     Bob   24      USA
2  Carlos   23    Spain
3   Diana   21  Germany

üîó Join Example (Students + Enrollments):
     name  country             title  grade
0   Alice       UK         Marketing   95.0
1     Bob      USA         Databases   88.0
2  Carlos    Spain         Marketing   92.5
3   Diana  Germany  Machine Learning   76.5


In [6]:
# Aggregation: Average grade by country
print("\nüìä Average Grade by Country:")
avg_grade_df = client.query_df("""
SELECT s.country, avg(e.grade) AS avg_grade
FROM university_demo.enrollments e
JOIN university_demo.students s ON e.student_id = s.student_id
GROUP BY s.country
ORDER BY avg_grade DESC
""")
print(avg_grade_df)


üìä Average Grade by Country:
   country  avg_grade
0       UK       95.0
1    Spain       92.5
2      USA       88.0
3  Germany       76.5


In [7]:
# ============================================
# 6. UUID Example
# ============================================
print("\nüÜî Example of UUID usage:")
uuid_example = client.query_df("SELECT generateUUIDv4() AS new_uuid")
print(uuid_example)

# ============================================
# 7. Clean up (optional)
# ============================================
# Uncomment if you want to remove all demo tables
# client.command("DROP DATABASE IF EXISTS university_demo")
# print("üßπ Cleaned up demo database.")


üÜî Example of UUID usage:
                               new_uuid
0  0c001555-459b-4e07-a37f-b06056bf1b2b


In [8]:
client.query("""
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
    name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [STATISTICS(stat1)] [TTL expr1] [PRIMARY KEY] [SETTINGS (name = value, ...)],
    name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [STATISTICS(stat2)] [TTL expr2] [PRIMARY KEY] [SETTINGS (name = value, ...)],
    ...
    INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
    INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
    ...
    PROJECTION projection_name_1 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]),
    PROJECTION projection_name_2 (SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY])
) ENGINE = MergeTree()
ORDER BY expr
[PARTITION BY expr]
[PRIMARY KEY expr]
[SAMPLE BY expr]
[TTL expr
    [DELETE|TO DISK 'xxx'|TO VOLUME 'xxx' [, ...] ]
    [WHERE conditions]
    [GROUP BY key_expr [SET v1 = aggr_func(v1) [, v2 = aggr_func(v2) ...]] ] ]
[SETTINGS name = value, ...]""")

DatabaseError: Received ClickHouse exception, code: 62, server response: Code: 62. DB::Exception: Syntax error: failed at position 15 ([) (line 2, col 14): [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
    name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [... Expected one of: IF NOT EXISTS, compound identifier, identifier. (SYNTAX_ERROR) (version 25.10.1.2351 (official build)) (for url http://localhost:8123)