In [None]:
"""
YKS Analyzer System - Main Notebook
"""

# ============================================================================
# CELL 1: IMPORT NECESSARY MODULES
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os

# Custom Modules
from data_loader import GoogleSheetsLoader
from data_cleaner import DataCleaner
from analysis.net_analyzer import NetAnalyzer
from analysis.topic_analyzer import TopicAnalyzer
from visualization.net_charts import NetVisualizer
from visualization.topic_charts import TopicVisualizer
from config import Config
from dotenv import load_dotenv

print("✅ All modules imported successfully.")



In [None]:
# ============================================================================
# CELL 2: Configuration and Initialization
# ============================================================================

# Import .env variables
load_dotenv()


config = Config()
Config.validate_config()

# print configuration summary
SHEET_URL = os.getenv("GOOGLE_SHEET_URL")
CREDENTIALS_PATH = os.getenv("CREDENTIALS_PATH")
print("📄 Google Sheets URL:", SHEET_URL)
print("🔑 Credentials Path:", CREDENTIALS_PATH)

EXAM_TYPE = "AYT" #Select "TYT" or "AYT" here.

TARGET_NET = config.Analysis.DEFAULT_TARGET_NET[EXAM_TYPE]

print(f"📊 Exam Type: {EXAM_TYPE}")
print(f"🎯 Target Net: {TARGET_NET}")

# Initialize main objects
loader = GoogleSheetsLoader(SHEET_URL, CREDENTIALS_PATH)
cleaner = DataCleaner(config, strict_mode=False, auto_fix=True)
net_analyzer = NetAnalyzer(config, exam_type=EXAM_TYPE)
topic_analyzer = TopicAnalyzer(config, exam_type=EXAM_TYPE)
net_viz = NetVisualizer(config)
topic_viz = TopicVisualizer(config)

print("\n✅ All main objects are created!")

In [None]:
print("\n📥 Loading data...")

if EXAM_TYPE == "TYT":
    raw_data = loader.load_tyt_data()
else:
    raw_data = loader.load_ayt_data()

print(f"✅ {len(raw_data)} records loaded from Google Sheets.")
print("\n📋 First three rows:")
display(raw_data.head(3))

In [None]:
# ============================================================================
# CELL 4: Data Cleaning
# ============================================================================

print("\n🧹 Cleaning Data...")

# Create cleaner object
cleaner = DataCleaner(config, strict_mode=False, auto_fix=True)

# Clean full dataset
cleaned_data = cleaner.clean_full_dataset(raw_data, exam_type=EXAM_TYPE)

# Add derived features
cleaned_data = cleaner.add_derived_features(cleaned_data, exam_type=EXAM_TYPE)

# Show cleaning report
report = cleaner.get_cleaning_report()
print(f"\n📊 Cleaning Raporu:")
print(f"  • Deleted rows: {report['rows_removed']}")
print(f"  • Fixed values: {report['values_fixed']}")
print(f"  • Warnings: {len(report['warnings'])}")

print(f"\n✅ Cleaning completeı: {len(cleaned_data)} Exam(s)")



In [None]:
# ============================================================================
# CELL 5: Net Analysis
# ============================================================================

print("\n📊 Analyzing nets...")

# Create analyzer object
net_analyzer = NetAnalyzer(config, exam_type=EXAM_TYPE)

# Total net statistics
print("\n=== TOTAL NET STATISTICS ===")
total_stats = net_analyzer.calculate_statistics(cleaned_data, 'Toplam Net')
for key, value in total_stats.items():
    print(f"  {key}: {value:.2f}" if isinstance(value, float) else f"  {key}: {value}")

# Trend analysis
print("\n=== TREND ANALYSIS ===")
trend = net_analyzer.get_progression_trend(cleaned_data, 'Toplam Net')
print(f"  Trend: {trend['trend']}")
print(f"  Slope: {trend['slope']:.2f} net/deneme")
print(f"  R²: {trend['r_squared']:.3f}")
print(f"  Total improvement: {trend['total_improvement']:.1f} net")
print(f"  Next prediction: {trend['next_prediction']:.1f} net")

# All subjects statistics
print("\n=== SUBJECT BASED STATISTICS ===")
all_stats = net_analyzer.get_all_subjects_statistics(cleaned_data)
print(all_stats[['mean', 'std', 'min', 'max']].round(2))

# Weak and strong subjects
print("\n=== WEAK SUBJECTS ===")
weak = net_analyzer.identify_weak_subjects(cleaned_data)
for subject_info in weak[:3]:
    print(f"  • {subject_info['subject']}: {subject_info['mean']:.1f} net (Trend: {subject_info['trend']})")

print("\n=== STRONG SUBJECTS ===")
strong = net_analyzer.identify_strong_subjects(cleaned_data)
for subject_info in strong[:3]:
    print(f"  • {subject_info['subject']}: {subject_info['mean']:.1f} net (Trend: {subject_info['trend']})")

# Comparison to target
print("\n=== COMPARISON TO TARGET ===")
target_comparison = net_analyzer.compare_to_target(cleaned_data, TARGET_NET)
print(f"  Target: {target_comparison['target']}")
print(f"  Current: {target_comparison['current']:.1f}")
print(f"  Gap: {target_comparison['gap']:.1f} net")
print(f"  Status: {target_comparison['status']}")
if target_comparison['exams_needed']:
    print(f"  Expected exam count: {target_comparison['exams_needed']}")



In [None]:
# ============================================================================
# CELL 6: Subject Analysis
# ============================================================================

print("\n📚 Subject analysis being made...")


topic_analyzer = TopicAnalyzer(config, exam_type=EXAM_TYPE)
# precomputation step
precomputed_topic_trends = topic_analyzer._precompute_all_topic_trends(cleaned_data)


# Most problematic topics
print("\n=== MOST PROBLEMATIC 10 TOPIC ===")
problematic = topic_analyzer.get_most_problematic_topics(cleaned_data, top_n=10)
for i, (topic, count) in enumerate(problematic, 1):
    print(f"  {i}. {topic}: {count} times")

# Weak areas (subject based)
print("\n=== WEAK AREAS (SUBJECT BASED) ===")
weak_areas = topic_analyzer.identify_weak_areas(cleaned_data, threshold=3)
for subject, topics in weak_areas.items():
    print(f"\n{subject}:")
    for topic in topics[:2]:  # Show top 2 weak topics per subject
        print(f"  • {topic}")


# Study plan
print("\n=== SUGGESTED STUDY PLAN ===")
study_plan = topic_analyzer.generate_study_plan(
    cleaned_data, 
    precomputed_topic_trends, 
    max_topics_per_subject=3
)
for subject, plan in study_plan.items():
    print(f"\n{subject}:")
    for item in plan:
        print(f"  {item['order']}. {item['topic']}")
        print(f"     Priority: {item['priority']} | Frequency: {item['frequency']} | {item['recent_status']}")

In [None]:
# ============================================================================
# CELL 7: Net Graphics
# ============================================================================

# Net Analysis Results
all_net_stats = net_analyzer.get_all_subjects_statistics(cleaned_data)
improvement_data = net_analyzer.calculate_improvement_rate(cleaned_data, window=3)

# Topic Analysis Results
most_problematic = topic_analyzer.get_most_problematic_topics(cleaned_data, top_n=15)
subject_comparison_topics = topic_analyzer.compare_subjects_by_topics(cleaned_data)


print("\n📈 Generating net graphics...")

# Create visualizer object
net_viz = NetVisualizer(config)

# Create charts directory if not exists
charts_dir = Path("output/charts")
charts_dir.mkdir(parents=True, exist_ok=True)

# Total net trend
fig1 = net_viz.plot_total_nets_by_exam(
    cleaned_data, 
    save_path=charts_dir / "total_net.png"
)
plt.show()

# Comparison of all subjects (last exam)
fig2 = net_viz.plot_all_subjects_comparison(
    cleaned_data,
    save_path=charts_dir / "subject_comparison.png"
)
plt.show()

net_viz.dashboard(cleaned_data, exam_type=EXAM_TYPE)

net_columns = [col for col in cleaned_data.columns if col.endswith('Net')]

# Comparison of all subjects (multi-line)
fig4 = net_viz.plot_multi_subject_comparison(
    cleaned_data,
    net_columns[:4] if len(net_columns) >= 4 else net_columns,
    save_path=charts_dir / "all_subjects_multi_comparison.png"
)
plt.show()

print(f"✅ Saved bet graphics: {charts_dir}")



In [None]:
# ============================================================================
# CELL 8: Topic Graphics
# ============================================================================

print("\n📊 Generating topic graphics...")

# Create visualizer object
topic_viz = TopicVisualizer(config)

# Most problematic topics in the last 3 exams
last3_data = cleaned_data.sort_values('Tarih').tail(3)
problematic_last3 = topic_analyzer.get_most_problematic_topics(last3_data, top_n=15)

fig8 = topic_viz.plot_total_wrong_topics(
    problematic_topics=problematic_last3, 
    top_n=15,
    save_path=charts_dir / "most_problematic_topics.png",
)
plt.show()


fig = topic_viz.plot_topic_trend_by_exam(cleaned_data, 'Matematik', 'türev')
if fig:
    plt.show()
else:
    print("Not enaough data for the graphic or the names doesnt match.")


print(f"✅ Saved subject graphics: {charts_dir}")

In [None]:
# ============================================================================
# CELL 9: Final Summary Report
# ============================================================================

print("\n📊 Generating final summary report...")
net_report = net_analyzer.generate_summary_report(cleaned_data)
topic_report = topic_analyzer.generate_topic_summary_report(cleaned_data)

print(f"\n📊 General Information:")
print(f"  • Exam Type: {EXAM_TYPE}")
print(f"  • Total Exam Count: {net_report.get('total_exams', 'N/A')}")
print(f"  • Date Range: {net_report.get('date_range', {}).get('first', pd.NaT).strftime('%Y-%m-%d')} -> {net_report.get('date_range', {}).get('last', pd.NaT).strftime('%Y-%m-%d')}")

print(f"\n🎯 Net Performance:")
print(f"  • Mean of Total Net: {net_report['overall_stats'].get('mean', 0):.2f}")
print(f"  • Highest Total Net: {net_report['overall_stats'].get('max', 0):.2f}")
print(f"  • Latest Exam Total Net: {net_report['overall_stats'].get('latest', 0):.2f}")
print(f"  • Improvement: {net_report['recent_improvement'].get('interpretation', 'N/A')}")


print("\n🎉 Analysis Complete!")

In [None]:
# ============================================================================
# CELL 10: Save data (Optional)
# ============================================================================

# Save cleaned data as csv
output_data_dir = Path("output/data")
output_data_dir.mkdir(parents=True, exist_ok=True)

cleaned_data.to_csv(output_data_dir / f"{EXAM_TYPE}_cleaned_data.csv", index=False)
print(f"\n💾 Cleaned data saved successfully: {output_data_dir / f'{EXAM_TYPE}_cleaned_data.csv'}")

# Save summary reports as JSON
import json

with open(output_data_dir / f"{EXAM_TYPE}_net_report.json", 'w', encoding='utf-8') as f:
    # Convert DataFrames to dicts
    report_to_save = net_report.copy()
    for key in report_to_save:
        if isinstance(report_to_save[key], pd.DataFrame):
            report_to_save[key] = report_to_save[key].to_dict()
    json.dump(report_to_save, f, ensure_ascii=False, indent=2, default=str)

print(f"💾 Net report saved successfully: {output_data_dir / f'{EXAM_TYPE}_net_report.json'}")

print("\n🎉 All processes completed!")