In [2]:
#@title  **Key Conceptual Domains with Significant Distributional Shifts**



# --- [الخطوة 0: التثبيت والاستيراد] ---
print("--- [Step 0: Installing and Importing Libraries] ---")
!git clone https://github.com/NoorBayan/Amthal.git
%cd Amthal
from utility import *
from IPython.display import display, HTML, clear_output

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
#from scipy.stats import chi2_contingency

import statsmodels.api as sm


# --- [الخطوة 1: تحميل وتجهيز البيانات] ---
print("--- [Step 1: Loading Data] ---")
# ⚠️⚠️⚠️  الرجاء التأكد من أن هذا المسار صحيح في بيئتك
INSTANCES_FILE_PATH = '/content/Amthal/data/processed/instances.csv'
try:
    df = pd.read_csv(INSTANCES_FILE_PATH, sep='\t', encoding='utf-16')
    df['Dominant_Concept']=[Dominant_Concept_dic[i]['en'] for i in df.Dominant_Concept]
    df['Valence']=[Valence_dic[i]['ar'] for i in df.Valence]
    df['Revelation_Phase']=[Revelation_Phase_dic[i]['ar'] for i in df.Revelation_Phase]
    df['Abstraction_Level']=[Abstraction_Level_dic[i]['ar'] for i in df.Abstraction_Level]

    print(f"Data loaded successfully. Found {len(df)} instances.\n")
except FileNotFoundError:
    print(f"⚠️ ERROR: File not found at '{INSTANCES_FILE_PATH}'. Please check the path and try again.")
    exit()



# --- 2. حساب النتائج الإحصائية الدقيقة باستخدام statsmodels ---
print("Calculating Chi-squared results and standardized residuals using statsmodels...")

# الخطوة أ: إنشاء جدول العد الخام
raw_counts = pd.crosstab(df['Dominant_Concept'], df['Revelation_Phase'])

# الخطوة ب: استخدام statsmodels.stats.Table لإجراء التحليل الكامل
table = sm.stats.Table(raw_counts)

# table.standardized_resids يعطينا مصفوفة البقايا المعيارية المعدلة مباشرة
std_residuals = table.standardized_resids

# table.fittedvalues يعطينا مصفوفة القيم المتوقعة
expected_df = table.fittedvalues

# فلترة المفاهيم التي تتجاوز البقايا المعيارية فيها ±1.96
significant_concepts_df = std_residuals[std_residuals.abs() >= 1.96].dropna(how='all')
significant_concepts_list = significant_concepts_df.index.tolist()
print(f"Found {len(significant_concepts_list)} concepts with significant shifts.")


# --- 3. بناء الجدول النهائي للمفاهيم المهمة فقط ---
print("Building the final summary table for significant concepts...")

final_table = raw_counts.loc[significant_concepts_list].copy()

# الخطوة أ: تنسيق أعمدة (Obs/Exp)
final_table['Meccan (Obs/Exp)'] = final_table.apply(
    lambda row: f"{row['مكي']:.0f} / {expected_df.loc[row.name, 'مكي']:.1f}", axis=1
)
final_table['Medinan (Obs/Exp)'] = final_table.apply(
    lambda row: f"{row['مدني']:.0f} / {expected_df.loc[row.name, 'مدني']:.1f}", axis=1
)

# الخطوة ب: إضافة عمود البقايا المعيارية الصحيح (سنأخذ قيمة الفترة المكية)
final_table['Std. Residual'] = significant_concepts_df.loc[significant_concepts_list, 'مكي']


# --- 4. حساب وتجميع الخصائص النوعية ---
print("Aggregating qualitative profiles...")
def calculate_skew(series):
    props = series.value_counts(normalize=True)
    if props.empty: return 'N/A'
    dominant = props.index[0]
    if props.iloc[0] > 0.65: return dominant.capitalize()
    elif len(props) > 1 and (props.iloc[0] + props.iloc[1]) > 0.8: return f"{props.index[0].capitalize()}/{props.index[1].capitalize()}"
    return 'Mixed'

df_sig = df[df['Dominant_Concept'].isin(significant_concepts_list)]
profiles = df_sig.groupby('Dominant_Concept').agg(
    Valence_Profile=('Valence', calculate_skew),
    Predominant_Rhetorical_Function=('Rhetorical_Function', lambda x: x.mode()[0] if not x.empty else 'N/A'),
    Abstraction_Level=('Abstraction_Level', calculate_skew)
)
final_table = final_table.merge(profiles, on='Dominant_Concept', how='left')


# --- 5. التنسيق النهائي للعرض ---
final_table['abs_residual'] = final_table['Std. Residual'].abs()
final_table_sorted = final_table.sort_values(by='abs_residual', ascending=False)

concept_map_en = {
    'طريق': 'PATH', 'قوة': 'POWER', 'إدراك': 'PERCEPTION', 'خراب': 'RUIN', 'مأوى': 'SHELTER', 'بناء': 'BUILDING',
    'نور': 'LIGHT', 'ماء': 'WATER', 'حياة': 'LIFE', 'تجارة': 'TRADE', 'خوف': 'FEAR', 'موت': 'DEATH',
    'ظلام': 'DARKNESS', 'نار': 'FIRE', 'خصائص مادية': 'MATERIALITY', 'معاملات': 'TRANSACTION', 'شعور': 'FEELING',
    'سلوكيات': 'BEHAVIOR', 'علاقات اجتماعية': 'SOCIAL REL.', 'ابتلاء': 'TRIAL', 'تطهير وإصلاح': 'PURIFICATION'
}
final_table_sorted.reset_index(inplace=True)
final_table_sorted['Dominant_Concept'] = final_table_sorted['Dominant_Concept'].apply(
    lambda x: f"{concept_map_en.get(x, x.capitalize())} ({x})"
)

display_columns = [
    'Dominant_Concept', 'Meccan (Obs/Exp)', 'Medinan (Obs/Exp)',
    'Std. Residual', 'Valence_Profile', 'Predominant_Rhetorical_Function', 'Abstraction_Level'
]
final_display = final_table_sorted[display_columns].copy()
final_display['Std. Residual'] = final_display['Std. Residual'].map('{:+.2f}'.format)


# --- 6. عرض الجدول النهائي ---
print("\n" + "="*150)
print("--- Key Conceptual Domains with Significant Distributional Shifts ---")
print("="*150)
print(final_display.to_markdown(index=False))
print("="*150)
print("(Residuals |z| > 1.96 are significant at p < .05. Metadata columns derived from corpus annotation fields.)")

--- [Step 0: Installing and Importing Libraries] ---
Cloning into 'Amthal'...
remote: Enumerating objects: 76, done.[K
remote: Counting objects: 100% (76/76), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 76 (delta 12), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (76/76), 1.47 MiB | 3.55 MiB/s, done.
Resolving deltas: 100% (12/12), done.
/content/Amthal/Amthal
--- [Step 1: Loading Data] ---
Data loaded successfully. Found 4078 instances.

Calculating Chi-squared results and standardized residuals using statsmodels...
Found 5 concepts with significant shifts.
Building the final summary table for significant concepts...
Aggregating qualitative profiles...

--- Key Conceptual Domains with Significant Distributional Shifts ---
| Dominant_Concept                    | Meccan (Obs/Exp)   | Medinan (Obs/Exp)   |   Std. Residual | Valence_Profile   |   Predominant_Rhetorical_Function | Abstraction_Level   |
|:------------------------------