In [1]:
import sqlite3
import pandas as pd
from pathlib import Path
from datetime import datetime

notebooks_dir = Path.cwd() if Path.cwd().name == 'notebooks' else Path.cwd() / 'notebooks'
sql_file = notebooks_dir / 'database.sql'
output_dir = notebooks_dir / 'output'
output_dir.mkdir(parents=True, exist_ok=True)

# Load database
temp_db = output_dir / 'temp_database.db'
if temp_db.exists():
    temp_db.unlink()

conn = sqlite3.connect(str(temp_db))
with open(sql_file, 'r', encoding='utf-8') as f:
    conn.executescript(f.read())
conn.commit()
print("DB loaded")

DB loaded


In [2]:
# Query students with school and verified faculty information
# Only include students who are participating in at least one event (part of at least one registration)
# We need to get one verified faculty per school for phone number
# Using a subquery to get the first verified faculty per school
query = """
SELECT DISTINCT
    UPPER(s.student_name) as name_of_student,
    s.student_id,
    sc.name as school_name,
    (
        SELECT f.mobile_number 
        FROM faculty f 
        WHERE f.school_id = s.school_id 
        AND f.is_verified = 1 
        ORDER BY f.created_at 
        LIMIT 1
    ) as mobile_number
FROM students s
JOIN schools sc ON s.school_id = sc.id
JOIN registration_participants rp ON s.id = rp.participant_id AND rp.participant_type = 'student'
ORDER BY sc.name, s.student_name
"""

df = pd.read_sql_query(query, conn)
print(f"Total students participating in events: {len(df)}")
display(df.head())

Total students participating in events: 1099


Unnamed: 0,name_of_student,student_id,school_name,mobile_number
0,ABHINAV KRISHNA U,STU-C2PKPXB4ZF,"Adarsh Special School, Kureekkad",9447391730
1,ALWIN THOMAS,STU-X4UUT49FTV,"Adarsh Special School, Kureekkad",9447391730
2,ANJANA P R,STU-T_O_QV56MZ,"Adarsh Special School, Kureekkad",9447391730
3,ANSIYA MOL,STU-IVARJ-FGAN,"Adarsh Special School, Kureekkad",9447391730
4,ANUSREE C,STU-FFUPZYHRJC,"Adarsh Special School, Kureekkad",9447391730


In [3]:
# Format phone number as +91 xxxxxxxxxx
def format_phone_number(phone):
    if pd.isna(phone) or phone is None:
        return None
    phone_str = str(phone).strip()
    # Remove any existing +91 or country code
    if phone_str.startswith('+91'):
        phone_str = phone_str[3:].strip()
    elif phone_str.startswith('91') and len(phone_str) > 10:
        phone_str = phone_str[2:].strip()
    # Remove any spaces or dashes
    phone_str = phone_str.replace(' ', '').replace('-', '')
    # Ensure it's 10 digits
    if len(phone_str) == 10 and phone_str.isdigit():
        return f"+91 {phone_str}"
    return phone_str  # Return as is if format is unexpected

df['phone_number_of_faculty'] = df['mobile_number'].apply(format_phone_number)
df_clean = df[['name_of_student', 'student_id', 'school_name', 'phone_number_of_faculty']].copy()

print(f"Cleaned data: {len(df_clean)} students")
print(f"Students with phone numbers: {df_clean['phone_number_of_faculty'].notna().sum()}")
display(df_clean.head(10))

Cleaned data: 1099 students
Students with phone numbers: 1099


Unnamed: 0,name_of_student,student_id,school_name,phone_number_of_faculty
0,ABHINAV KRISHNA U,STU-C2PKPXB4ZF,"Adarsh Special School, Kureekkad",+91 9447391730
1,ALWIN THOMAS,STU-X4UUT49FTV,"Adarsh Special School, Kureekkad",+91 9447391730
2,ANJANA P R,STU-T_O_QV56MZ,"Adarsh Special School, Kureekkad",+91 9447391730
3,ANSIYA MOL,STU-IVARJ-FGAN,"Adarsh Special School, Kureekkad",+91 9447391730
4,ANUSREE C,STU-FFUPZYHRJC,"Adarsh Special School, Kureekkad",+91 9447391730
5,ANVIKA ANEESH,STU-DQR82TF_PX,"Adarsh Special School, Kureekkad",+91 9447391730
6,APARNA SASIKUMAR,STU-L79UMJUXPI,"Adarsh Special School, Kureekkad",+91 9447391730
7,DEVIKA V R,STU-HOVEZZS3EN,"Adarsh Special School, Kureekkad",+91 9447391730
8,DONNIE BIJU,STU-R7CTK913_F,"Adarsh Special School, Kureekkad",+91 9447391730
9,EVAN SAJI MATHEW,STU-W3CA9NKCRH,"Adarsh Special School, Kureekkad",+91 9447391730


In [4]:
# Generate Excel file with formatting
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.utils import get_column_letter

excel_file = output_dir / f'student_export_{datetime.now().strftime("%Y%m%d_%H%M%S")}.xlsx'

with pd.ExcelWriter(excel_file, engine='openpyxl') as writer:
    df_clean.to_excel(writer, sheet_name='Students', index=False)
    
    # Get the worksheet
    worksheet = writer.sheets['Students']
    
    # Format header
    header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
    header_font = Font(bold=True, color="FFFFFF", size=11)
    
    for cell in worksheet[1]:
        cell.fill = header_fill
        cell.font = header_font
        cell.alignment = Alignment(horizontal="center", vertical="center", wrap_text=True)
    
    # Auto-adjust column widths
    for idx, column in enumerate(df_clean.columns, 1):
        column_letter = get_column_letter(idx)
        max_length = max(
            len(str(column)),
            df_clean[column].astype(str).map(len).max() if len(df_clean) > 0 else 0
        )
        worksheet.column_dimensions[column_letter].width = min(max(max_length + 2, 10), 50)
    
    # Freeze header row
    worksheet.freeze_panes = "A2"
    worksheet.row_dimensions[1].height = 25

print(f"\nExcel file saved: {excel_file}")
print(f"Total students exported: {len(df_clean)}")

conn.close()


Excel file saved: /Users/arjunkrishna/Developer/Devmorphix/chilamboli/notebooks/output/student_export_20260127_044907.xlsx
Total students exported: 1099
