In [5]:
#@title  **Top standardized residuals for Valence–Rhetorical Function associations**


# --- إعداد عام ---
!git clone https://github.com/NoorBayan/Amthal.git
%cd Amthal

from utility import *
from IPython.display import display, HTML, clear_output
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from itertools import combinations
import io
import statsmodels.api as sm
from matplotlib.lines import Line2D


import warnings

# تجاهل التحذيرات الخاصة بالخطوط
warnings.filterwarnings("ignore", message=".*font_manager.*")

# تعيين خط متوفر في كل بيئات matplotlib
plt.rcParams["font.family"] = "DejaVu Serif"
plt.rcParams["font.serif"] = ["DejaVu Serif"]

# --- [الخطوة 1: تحميل وتجهيز البيانات] ---
INSTANCES_FILE_PATH = '/content/Amthal/data/processed/instances.csv'
try:
    df = pd.read_csv(INSTANCES_FILE_PATH, sep='\t', encoding='utf-16')
    df['Dominant_Concept']=[Dominant_Concept_dic[i]['en'] for i in df.Dominant_Concept]
    df['Valence']=[Valence_dic[i]['ar'] for i in df.Valence]
    df['Revelation_Phase']=[Revelation_Phase_dic[i]['ar'] for i in df.Revelation_Phase]
    df['Abstraction_Level']=[Abstraction_Level_dic[i]['ar'] for i in df.Abstraction_Level]
    df['Rhetorical_Function']=[Rhetorical_Function_dic[i]['ar'] for i in df.Rhetorical_Function]


    print(f"Data loaded successfully. Found {len(df)} instances.\n")
except FileNotFoundError:
    print(f"⚠️ ERROR: File not found at '{INSTANCES_FILE_PATH}'. Please check the path and try again.")
    exit()




# --- 1. تحميل وتجهيز البيانات الكاملة ---
# !!! تأكد من أن هذا المسار صحيح !!!
contingency_table = pd.crosstab(df['Rhetorical_Function'], df['Valence'])
table = sm.stats.Table(contingency_table)
std_residuals_df = table.standardized_resids

# --- 3. إعادة الهيكلة وتحديد الأهم (لا تغيير) ---
residuals_long = std_residuals_df.stack().reset_index()
residuals_long.columns = ['Rhetorical_Function', 'Valence', 'Residual']
top_positive = residuals_long.nlargest(4, 'Residual')
top_positive['Direction'] = 'Strong positive association'
top_negative = residuals_long.nsmallest(2, 'Residual')
top_negative['Direction'] = 'Strong repulsion'

# --- 4. دمج وتنسيق الجدول النهائي (مع تعديل الفرز) ---
print("Formatting the final table...")
final_table = pd.concat([top_positive, top_negative])

# (كود الترجمة يبقى كما هو)
valence_map = {'إيجابي': 'Positive', 'سلبي': 'Negative', 'محايد': 'Neutral'}
final_table['Valence'] = final_table['Valence'].map(valence_map)
function_map = {
    'امتنان': 'Gratitude (imtinan)', 'وعد': 'Promise (waʿd)',
    'وعيد': 'Warning (waʿīd)', 'تعظيم': 'Glorification (taʿẓīm)'
}
# تبسيط القاموس ليشمل فقط ما نحتاجه
final_table['Rhetorical_Function'] = final_table['Rhetorical_Function'].map(function_map)

# >> التصحيح الرئيسي هنا: طريقة فرز أبسط وأكثر موثوقية <<
# نفرز أولاً الارتباطات الإيجابية (مرتبة من الأعلى للأسفل)
positive_sorted = final_table[final_table['Direction'] == 'Strong positive association'].sort_values('Residual', ascending=False)
# ثم نفرز حالات التنافر (مرتبة من الأقوى للأضعف، أي من الأصغر للأكبر)
negative_sorted = final_table[final_table['Direction'] == 'Strong repulsion'].sort_values('Residual', ascending=True)
# ندمج الجدولين المرتبين
final_table_sorted = pd.concat([positive_sorted, negative_sorted])

# تنسيق عمود البقايا
final_table_sorted['Residual'] = final_table_sorted['Residual'].map('{:+.2f}'.format)


# --- 5. عرض الجدول النهائي ---
print("\n" + "="*80)
print("--- Top Standardized Residuals for Valence–Rhetorical Function Associations ---")
print("="*80)
print(final_table_sorted.to_markdown(index=False))
print("="*80)
print("(Residuals |z| > 1.96 significant at p < .05. Positive values indicate overrepresentation.)")

Cloning into 'Amthal'...
remote: Enumerating objects: 101, done.[K
remote: Counting objects: 100% (101/101), done.[K
remote: Compressing objects: 100% (89/89), done.[K
remote: Total 101 (delta 14), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (101/101), 2.84 MiB | 5.55 MiB/s, done.
Resolving deltas: 100% (14/14), done.
/content/Amthal/Amthal/Amthal
Data loaded successfully. Found 4078 instances.

Formatting the final table...

--- Top Standardized Residuals for Valence–Rhetorical Function Associations ---
| Rhetorical_Function    | Valence   |   Residual | Direction                   |
|:-----------------------|:----------|-----------:|:----------------------------|
| Promise (waʿd)         | Positive  |      27.19 | Strong positive association |
| Gratitude (imtinan)    | Positive  |      20.12 | Strong positive association |
| Glorification (taʿẓīm) | Positive  |      17.88 | Strong positive association |
| Glorification (taʿẓīm) | Negative  |     -23.33 |