In [1]:
import pandas as pd

df = pd.read_csv('Global_AI_Content_Impact_Dataset.csv')

print("شكل البيانات ", df.shape)
print("\nأول 5 صفوف:")
print(df.head())

شكل البيانات  (200, 12)

أول 5 صفوف:
       Country  Year    Industry  AI Adoption Rate (%)  \
0  South Korea  2022       Media                 44.29   
1        China  2025       Legal                 34.75   
2          USA  2022  Automotive                 81.06   
3       France  2021       Legal                 85.24   
4       France  2021      Gaming                 78.95   

   AI-Generated Content Volume (TBs per year)  Job Loss Due to AI (%)  \
0                                       33.09                   16.77   
1                                       66.74                   46.89   
2                                       96.13                   10.66   
3                                       93.76                   27.70   
4                                       45.62                   17.45   

   Revenue Increase Due to AI (%)  Human-AI Collaboration Rate (%)  \
0                           46.12                            74.79   
1                           52.46  

In [2]:
print("\nمعلومات عن الأعمدة وأنواع البيانات:")
print(df.info())


معلومات عن الأعمدة وأنواع البيانات:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 12 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   Country                                     200 non-null    object 
 1   Year                                        200 non-null    int64  
 2   Industry                                    200 non-null    object 
 3   AI Adoption Rate (%)                        200 non-null    float64
 4   AI-Generated Content Volume (TBs per year)  200 non-null    float64
 5   Job Loss Due to AI (%)                      200 non-null    float64
 6   Revenue Increase Due to AI (%)              200 non-null    float64
 7   Human-AI Collaboration Rate (%)             200 non-null    float64
 8   Top AI Tools Used                           200 non-null    object 
 9   Regulation Status                           200 no

In [3]:
print("\nعدد القيم الناقصة في كل عمود:")
print(df.isnull().sum())


عدد القيم الناقصة في كل عمود:
Country                                       0
Year                                          0
Industry                                      0
AI Adoption Rate (%)                          0
AI-Generated Content Volume (TBs per year)    0
Job Loss Due to AI (%)                        0
Revenue Increase Due to AI (%)                0
Human-AI Collaboration Rate (%)               0
Top AI Tools Used                             0
Regulation Status                             0
Consumer Trust in AI (%)                      0
Market Share of AI Companies (%)              0
dtype: int64


In [4]:
duplicates_count = df.duplicated().sum()
print(f"\nعدد الصفوف المكررة: {duplicates_count}")


عدد الصفوف المكررة: 0


In [5]:
print("\nالوصف الإحصائي للأعمدة العددية:")
print(df.describe())


الوصف الإحصائي للأعمدة العددية:
              Year  AI Adoption Rate (%)  \
count   200.000000            200.000000   
mean   2022.315000             54.265850   
std       1.825496             24.218067   
min    2020.000000             10.530000   
25%    2021.000000             33.222500   
50%    2022.000000             53.310000   
75%    2024.000000             76.220000   
max    2025.000000             94.760000   

       AI-Generated Content Volume (TBs per year)  Job Loss Due to AI (%)  \
count                                   200.00000              200.000000   
mean                                     46.07260               25.788250   
std                                      29.16122               13.901105   
min                                       1.04000                0.090000   
25%                                      20.32250               14.995000   
50%                                      44.32000               25.735000   
75%                            

In [7]:
print("\nالقيم الفريدة في عمود 'Regulation Status':")
print(df['Regulation Status'].unique())

print("\nبعض القيم الفريدة في عمود 'Top AI Tools Used':")
print(df['Top AI Tools Used'].dropna().unique()[:10])


القيم الفريدة في عمود 'Regulation Status':
['Strict' 'Moderate' 'Lenient']

بعض القيم الفريدة في عمود 'Top AI Tools Used':
['Bard' 'DALL-E' 'Stable Diffusion' 'Claude' 'Midjourney' 'ChatGPT'
 'Synthesia']


In [8]:
print("\nعدد السجلات لكل سنة:")
print(df['Year'].value_counts().sort_index())


عدد السجلات لكل سنة:
Year
2020    47
2021    32
2022    31
2023    29
2024    23
2025    38
Name: count, dtype: int64


In [10]:
import pandas as pd
import plotly.express as px
df = pd.read_csv('/content/Global_AI_Content_Impact_Dataset.csv')

In [11]:
fig_adoption = px.line(
    df.groupby(['Year', 'Industry'])['AI Adoption Rate (%)'].mean().reset_index(),
    x='Year',
    y='AI Adoption Rate (%)',
    color='Industry',
    title='تطور معدل تبني الذكاء الاصطناعي حسب القطاع عبر السنوات',
    markers=True
)
fig_adoption.show()


In [13]:
revenue_by_industry = df.groupby('Industry')['Revenue Increase Due to AI (%)'].mean().sort_values(ascending=False).reset_index()

fig_revenue = px.bar(
    revenue_by_industry,
    x='Industry',
    y='Revenue Increase Due to AI (%)',
    title='متوسط زيادة الإيرادات بسبب AI حسب القطاع',
    text='Revenue Increase Due to AI (%)'
)
fig_revenue.update_traces(textposition='outside')
fig_revenue.show()

In [14]:
content_2025 = df[df['Year'] == 2025].sort_values(by='AI-Generated Content Volume (TBs per year)', ascending=False)

fig_content = px.bar(
    content_2025,
    x='Country',
    y='AI-Generated Content Volume (TBs per year)',
    title='حجم المحتوى المولّد بالذكاء الاصطناعي حسب الدولة في 2025',
    text='AI-Generated Content Volume (TBs per year)'
)
fig_content.update_traces(textposition='outside')
fig_content.show()

In [15]:
fig_scatter = px.scatter(
    df,
    x='AI Adoption Rate (%)',
    y='Revenue Increase Due to AI (%)',
    color='Industry',
    trendline='ols',
    title='العلاقة بين معدل تبني AI وزيادة الإيرادات',
    labels={
        'AI Adoption Rate (%)': 'معدل تبني AI (%)',
        'Revenue Increase Due to AI (%)': 'زيادة الإيرادات (%)'
    }
)
fig_scatter.show()

In [16]:
from collections import Counter

tools_series = df['Top AI Tools Used'].dropna().str.split(',')

all_tools = [tool.strip() for sublist in tools_series for tool in sublist]

tools_count = Counter(all_tools)
tools_df = pd.DataFrame(tools_count.items(), columns=['AI Tool', 'Count']).sort_values(by='Count', ascending=False).head(15)

fig_tools = px.bar(
    tools_df,
    x='AI Tool',
    y='Count',
    title='أكثر أدوات AI استخدامًا في البيانات',
    text='Count'
)
fig_tools.update_traces(textposition='outside')
fig_tools.show()

In [19]:
import pandas as pd
from sklearn.linear_model import LinearRegression
import numpy as np


df = pd.read_csv('/content/Global_AI_Content_Impact_Dataset.csv')

df_grouped = df.groupby(['Year', 'Industry'])['AI Adoption Rate (%)'].mean().reset_index()

industries = df_grouped['Industry'].unique()

predictions = []

for ind in industries:
    df_ind = df_grouped[df_grouped['Industry'] == ind]

    # X هو السنة بصيغة عددية (مثلاً 2020 -> 2020)
    X = df_ind['Year'].values.reshape(-1,1)
    y = df_ind['AI Adoption Rate (%)'].values

    model = LinearRegression()
    model.fit(X, y)

    pred_2026 = model.predict(np.array([[2026]]))[0]

    predictions.append({'Industry': ind, 'Year': 2026, 'Predicted AI Adoption Rate (%)': pred_2026})

df_pred = pd.DataFrame(predictions)
print("توقعات معدل تبني AI لعام 2026 حسب القطاع:")
print(df_pred)


توقعات معدل تبني AI لعام 2026 حسب القطاع:
        Industry  Year  Predicted AI Adoption Rate (%)
0     Automotive  2026                       76.916889
1      Education  2026                       50.911194
2        Finance  2026                       53.206222
3         Gaming  2026                       55.037644
4     Healthcare  2026                       70.997667
5          Legal  2026                       62.663000
6  Manufacturing  2026                       80.420000
7      Marketing  2026                       61.117778
8          Media  2026                       48.519278
9         Retail  2026                       34.441111
