In [None]:

#@title  **Summary of Significantly Shifting Concepts**


# --- إعداد عام ---
!git clone https://github.com/NoorBayan/Amthal.git
%cd Amthal

from utility import *
from IPython.display import display, HTML, clear_output
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from itertools import combinations
import io
import statsmodels.api as sm
from matplotlib.lines import Line2D

import warnings

# تجاهل التحذيرات الخاصة بالخطوط
warnings.filterwarnings("ignore", message=".*font_manager.*")

# تعيين خط متوفر في كل بيئات matplotlib
plt.rcParams["font.family"] = "DejaVu Serif"
plt.rcParams["font.serif"] = ["DejaVu Serif"]

# --- [الخطوة 1: تحميل وتجهيز البيانات] ---
INSTANCES_FILE_PATH = '/content/Amthal/data/processed/instances.csv'
try:
    df = pd.read_csv(INSTANCES_FILE_PATH, sep='\t', encoding='utf-16')
    df['Dominant_Concept']=[Dominant_Concept_dic[i]['en'] for i in df.Dominant_Concept]
    df['Valence']=[Valence_dic[i]['ar'] for i in df.Valence]
    df['Revelation_Phase']=[Revelation_Phase_dic[i]['ar'] for i in df.Revelation_Phase]
    df['Abstraction_Level']=[Abstraction_Level_dic[i]['ar'] for i in df.Abstraction_Level]
    print(f"Data loaded successfully. Found {len(df)} instances.\n")
except FileNotFoundError:
    print(f"⚠️ ERROR: File not found at '{INSTANCES_FILE_PATH}'. Please check the path and try again.")
    exit()




# --- 2. حساب النتائج الإحصائية الدقيقة ---
print("Calculating Chi-squared results and standardized residuals using statsmodels...")
raw_counts = pd.crosstab(df['Dominant_Concept'], df['Revelation_Phase'])
table = sm.stats.Table(raw_counts)
std_residuals = table.standardized_resids
expected_df = table.fittedvalues
significant_concepts_df = std_residuals[std_residuals.abs() >= 1.96].dropna(how='all')
significant_concepts_list = significant_concepts_df.index.tolist()
print(f"Found {len(significant_concepts_list)} concepts with significant shifts.")


# --- 3. بناء الجدول النهائي للمفاهيم المهمة فقط ---
print("Building the final summary table for significant concepts...")
final_table = raw_counts.loc[significant_concepts_list].copy()

# تنسيق أعمدة (Obs/Exp)
final_table['Meccan (Obs/Exp)'] = final_table.apply(
    lambda row: f"{row['مكي']:.0f} / {expected_df.loc[row.name, 'مكي']:.1f}", axis=1
)
final_table['Medinan (Obs/Exp)'] = final_table.apply(
    lambda row: f"{row['مدني']:.0f} / {expected_df.loc[row.name, 'مدني']:.1f}", axis=1
)
final_table['Std. Residual'] = significant_concepts_df.loc[significant_concepts_list, 'مكي']


# --- 4. حساب وتجميع الخصائص النوعية (مع الترجمة المدمجة) ---
print("Aggregating qualitative profiles...")

# >> التصحيح 1: تعريف قواميس الترجمة هنا <<
valence_map = {'إيجابي': 'Positive', 'سلبي': 'Negative', 'محايد': 'Neutral'}
abs_level_map = {'محسوس': 'Concrete', 'مجرد': 'Abstract'}
function_map = {
    'امتنان': 'Gratitude', 'تشريع': 'Legislation', 'تعظيم': 'Glorification',
    'تقريب': 'Approximation', 'تمجيد': 'Exaltation', 'حجاج': 'Argumentation',
    'سرد': 'Narration', 'وصف': 'Description', 'وعد': 'Promise', 'وعيد': 'Warning'
}

# دالة مساعدة لحساب "الميل" مع الترجمة الفورية
def calculate_skew_translated(series, translation_map):
    props = series.value_counts(normalize=True)
    if props.empty: return 'N/A'

    # ترجمة أسماء الفئات قبل عرضها
    dominant_en = translation_map.get(props.index[0], props.index[0])

    if props.iloc[0] > 0.65:
        return dominant_en.capitalize()
    elif len(props) > 1 and (props.iloc[0] + props.iloc[1]) > 0.8:
        second_dominant_en = translation_map.get(props.index[1], props.index[1])
        return f"{dominant_en.capitalize()}/{second_dominant_en.capitalize()}"
    return 'Mixed'


df_sig = df[df['Dominant_Concept'].isin(significant_concepts_list)]
profiles = df_sig.groupby('Dominant_Concept').agg(
    Valence_Profile=('Valence', lambda x: calculate_skew_translated(x, valence_map)),
    Predominant_Rhetorical_Function=('Rhetorical_Function', lambda x: function_map.get(x.mode()[0], 'N/A') if not x.empty else 'N/A'),
    Abstraction_Level=('Abstraction_Level', lambda x: calculate_skew_translated(x, abs_level_map))
)

final_table = final_table.merge(profiles, on='Dominant_Concept', how='left')


# --- 5. التنسيق النهائي للعرض ---
final_table['abs_residual'] = final_table['Std. Residual'].abs()
final_table_sorted = final_table.sort_values(by='abs_residual', ascending=False)

concept_map_ar_to_en = {
    'طريق': 'PATH', 'قوة': 'POWER', 'إدراك': 'PERCEPTION', 'خراب': 'RUIN', 'مأوى': 'SHELTER', 'بناء': 'BUILDING',
    'نور': 'LIGHT', 'ماء': 'WATER', 'حياة': 'LIFE', 'تجارة': 'TRADE', 'خوف': 'FEAR', 'موت': 'DEATH',
    'ظلام': 'DARKNESS', 'نار': 'FIRE', 'خصائص مادية': 'MATERIALITY', 'معاملات': 'TRANSACTION', 'شعور': 'FEELING',
    'سلوكيات': 'BEHAVIOR', 'علاقات اجتماعية': 'SOCIAL REL.', 'ابتلاء': 'TRIAL', 'تطهير وإصلاح': 'PURIFICATION'
}
final_table_sorted.reset_index(inplace=True)
final_table_sorted['Dominant_Concept'] = final_table_sorted['Dominant_Concept'].apply(
    lambda x: f"{concept_map_ar_to_en.get(x, x.capitalize())} ({x})"
)

display_columns = [
    'Dominant_Concept', 'Meccan (Obs/Exp)', 'Medinan (Obs/Exp)',
    'Std. Residual', 'Valence_Profile', 'Predominant_Rhetorical_Function',
    'Abstraction_Level'
]
final_display = final_table_sorted[display_columns].copy()
final_display['Std. Residual'] = final_display['Std. Residual'].map('{:+.2f}'.format)


# --- 6. عرض الجدول النهائي ---
print("\n" + "="*150)
print("--- Table 2: Key Conceptual Domains with Significant Distributional Shifts ---")
print("="*150)
print(final_display.to_markdown(index=False))
print("="*150)
print("(Residuals |z| > 1.96 significant at p < .05. Metadata columns derived from corpus annotation fields.)")

Cloning into 'Amthal'...
remote: Enumerating objects: 101, done.[K
remote: Counting objects: 100% (101/101), done.[K
remote: Compressing objects: 100% (89/89), done.[K
remote: Total 101 (delta 14), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (101/101), 2.84 MiB | 5.63 MiB/s, done.
Resolving deltas: 100% (14/14), done.
/content/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal/Amthal
Data loaded successfully. Found 4078 instances.

Calculating Chi-squared results and standardized residuals using statsmodels...
Found 5 concepts with significant shifts.
Building the final summary table for significant concepts...
Aggregating qualitative profiles...

--- Table 2: Key Conceptual Domains with Significant Distributional Shifts ---
| Dominant_Concept                    | Meccan (Obs/Exp)   | Medinan (Obs/Exp)   |   Std. Residual | Valence_Profile   | Predominant_Rhetorical_Function   | Abstraction_Level   |
|:-----------------------------