In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/freelancer_earnings_bd.csv")

In [3]:
print("Общая структура:")
print(df.info())

print("\nПервые строки:")
print(df.head())

print("\nКолонки в датасете:")
print(df.columns.tolist())

Общая структура:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1950 entries, 0 to 1949
Data columns (total 15 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Freelancer_ID      1950 non-null   int64  
 1   Job_Category       1950 non-null   object 
 2   Platform           1950 non-null   object 
 3   Experience_Level   1950 non-null   object 
 4   Client_Region      1950 non-null   object 
 5   Payment_Method     1950 non-null   object 
 6   Job_Completed      1950 non-null   int64  
 7   Earnings_USD       1950 non-null   int64  
 8   Hourly_Rate        1950 non-null   float64
 9   Job_Success_Rate   1950 non-null   float64
 10  Client_Rating      1950 non-null   float64
 11  Job_Duration_Days  1950 non-null   int64  
 12  Project_Type       1950 non-null   object 
 13  Rehire_Rate        1950 non-null   float64
 14  Marketing_Spend    1950 non-null   int64  
dtypes: float64(4), int64(5), object(6)
memory usage: 228.6+

In [4]:
print("\nПропущенные значения по колонкам:")
print(df.isnull().sum())


Пропущенные значения по колонкам:
Freelancer_ID        0
Job_Category         0
Platform             0
Experience_Level     0
Client_Region        0
Payment_Method       0
Job_Completed        0
Earnings_USD         0
Hourly_Rate          0
Job_Success_Rate     0
Client_Rating        0
Job_Duration_Days    0
Project_Type         0
Rehire_Rate          0
Marketing_Spend      0
dtype: int64


In [5]:
print("\nУникальные категории Job_Category:")
print(df["Job_Category"].unique())


Уникальные категории Job_Category:
['Web Development' 'App Development' 'Data Entry' 'Digital Marketing'
 'Customer Support' 'Content Writing' 'Graphic Design' 'SEO']


In [6]:
avg_rate_by_category = df.groupby("Job_Category")["Hourly_Rate"].mean().sort_values(ascending=False)
print("\nСредняя ставка по категориям:")
print(avg_rate_by_category.round(2))


Средняя ставка по категориям:
Job_Category
Content Writing      54.68
SEO                  54.31
Customer Support     54.14
Digital Marketing    54.09
Web Development      51.50
Graphic Design       51.48
App Development      50.45
Data Entry           50.35
Name: Hourly_Rate, dtype: float64


In [7]:
avg_earnings_by_payment = df.groupby("Payment_Method")["Earnings_USD"].mean().sort_values(ascending=False)
print("\nСредний доход по методам оплаты:")
print(avg_earnings_by_payment.round(2))


Средний доход по методам оплаты:
Payment_Method
Crypto            5139.30
Bank Transfer     5019.96
PayPal            4976.69
Mobile Banking    4923.65
Name: Earnings_USD, dtype: float64


In [8]:
avg_rate_by_region = df.groupby("Client_Region")["Hourly_Rate"].mean().sort_values(ascending=False)
print("\nСредняя ставка по регионам:")
print(avg_rate_by_region.round(2))


Средняя ставка по регионам:
Client_Region
Middle East    54.51
USA            53.79
Asia           53.29
UK             52.36
Europe         52.36
Australia      52.24
Canada         48.94
Name: Hourly_Rate, dtype: float64


In [9]:
avg_rate_by_experience = df.groupby("Experience_Level")["Hourly_Rate"].mean().sort_values(ascending=False)
print("\nСредняя ставка по уровню опыта:")
print(avg_rate_by_experience.round(2))


Средняя ставка по уровню опыта:
Experience_Level
Intermediate    54.43
Expert          52.42
Beginner        50.96
Name: Hourly_Rate, dtype: float64


In [10]:
experts = df[df["Experience_Level"] == "Expert"]
less_than_100 = experts[experts["Job_Completed"] < 100]
percentage = len(less_than_100) / len(experts) * 100

print(f"\nПроцент экспертов, выполнивших менее 100 проектов: {percentage:.2f}%")


Процент экспертов, выполнивших менее 100 проектов: 33.85%


In [11]:
avg_earnings_by_platform = df.groupby("Platform")["Earnings_USD"].mean().sort_values(ascending=False)
print("\nСредний доход по платформам:")
print(avg_earnings_by_platform.round(2))


Средний доход по платформам:
Platform
Fiverr           5067.72
Freelancer       5039.27
PeoplePerHour    5030.78
Upwork           5028.96
Toptal           4922.62
Name: Earnings_USD, dtype: float64


In [3]:
df_cleaned = df.dropna()

In [4]:
df.to_csv("data/freelancer_earnings_bd_clean.csv", index=False)