# Kvaliteta življenja v Kibergradu

In [1]:
import pandas as pd
import matplotlib.pyplot as plt

csv_file_path = 'Kibergrad.csv'
df = pd.read_csv(csv_file_path)

In [2]:
df = df.rename(columns={"'TIP'": 'TIP', "'CLANOV'": 'CLANOV', "'OTROK'": 'OTROK', "'DOHODEK'": 'DOHODEK', "'CETRT'": 'CETRT', "'IZOBRAZBA'": 'IZOBRAZBA'})

## Analiza bogatih in revnih

V tabelo bogatih dodamo 1000 najbogatejših družin.

In [3]:
bogati = df[df["DOHODEK"] > df.sort_values("DOHODEK", ascending=False).iloc[1000]["DOHODEK"]]
bogati["DOHODEK"].mean()

156949.927

In [24]:
df

Unnamed: 0,TIP,CLANOV,OTROK,DOHODEK,CETRT,IZOBRAZBA
0,1,2,0,43450,1,39
1,1,2,0,79000,1,40
2,1,2,0,51306,1,39
3,1,4,2,24850,1,41
4,1,4,2,65145,1,43
...,...,...,...,...,...,...
43881,1,5,3,30800,4,36
43882,1,2,0,16684,4,34
43883,1,2,0,12000,4,39
43884,1,2,0,50022,4,34


V tabelo revnih dodamo 1000 najrevnejših družin.

In [4]:
revni = df[df["DOHODEK"] < df.sort_values("DOHODEK", ascending=False).iloc[-1000]["DOHODEK"]]
revni["DOHODEK"].mean()

414.26452905811624

In [5]:
import math
stand_odk_bogati = math.sqrt(bogati["DOHODEK"].var())
stand_odk_revni = math.sqrt(revni["DOHODEK"].var())
stand_odk_bogati

38230.70630577349

In [6]:
stand_odk_revni

1638.8649441243067

In [7]:
revni["DOHODEK"].max()

2190

Sedaj primerjajmo bogate in revne družine po ostalih parametrih in na podlagi tega postavimo hipoteze o korelacijskem koeficientu med določenimi količinami.

In [8]:
revni["IZOBRAZBA"].mean()

37.833667334669336

In [9]:
bogati["IZOBRAZBA"].mean()

42.648

In [10]:
revni["IZOBRAZBA"].var()

7.655354842081354

In [11]:
bogati["IZOBRAZBA"].var()

6.410506506506506

In [12]:
df["IZOBRAZBA"].min()

31

In [13]:
bogati["IZOBRAZBA"].min()

31

In [14]:
bogati["IZOBRAZBA"].max()

46

In [15]:
revni["IZOBRAZBA"].min()

31

In [16]:
revni["IZOBRAZBA"].max()

45

Opazimo, da morata biti izobrazba in dohodek pozitivno korelirana. Sumimo, da je korelacija nekje 0,3 do 0,4.

In [17]:
bogati["OTROK"].mean()

0.579

In [18]:
revni["OTROK"].mean()

1.2735470941883769

In [19]:
bogati["OTROK"].max()

6

In [20]:
revni["OTROK"].max()

8

Iz zgornjega se zdi, da morata biti število otrok in dohodek negativno korelirana. Korelacijski koeficient bi lahko bil nekje -0,2.

## Preizkus hipotez

In [21]:
df["DOHODEK"].corr(df["IZOBRAZBA"])

0.4499440967870731

In [22]:
df["DOHODEK"].corr(df["OTROK"])

-0.08045934385825014

In [23]:
df.describe()

Unnamed: 0,TIP,CLANOV,OTROK,DOHODEK,CETRT,IZOBRAZBA
count,43886.0,43886.0,43886.0,43886.0,43886.0,43886.0
mean,1.430935,3.137288,0.947933,41335.507041,2.52609,39.422504
std,0.793332,1.293711,1.157211,32037.619418,1.078228,3.021883
min,1.0,2.0,0.0,-22166.0,1.0,31.0
25%,1.0,2.0,0.0,18300.0,2.0,39.0
50%,1.0,3.0,1.0,34550.0,3.0,39.0
75%,1.0,4.0,2.0,55827.75,3.0,41.0
max,3.0,25.0,9.0,451887.0,4.0,46.0


In [25]:
df[df["CETRT"].isin([1,2])]

Unnamed: 0,TIP,CLANOV,OTROK,DOHODEK,CETRT,IZOBRAZBA
0,1,2,0,43450,1,39
1,1,2,0,79000,1,40
2,1,2,0,51306,1,39
3,1,4,2,24850,1,41
4,1,4,2,65145,1,43
...,...,...,...,...,...,...
20534,1,2,0,31216,2,39
20535,1,3,0,65002,2,40
20536,1,5,3,70674,2,43
20537,1,3,0,57623,2,40


In [26]:
bogati.describe()

Unnamed: 0,TIP,CLANOV,OTROK,DOHODEK,CETRT,IZOBRAZBA
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,1.067,3.313,0.579,156949.927,2.394,42.648
std,0.332598,1.363411,0.935218,38230.706306,1.192554,2.531898
min,1.0,2.0,0.0,123908.0,1.0,31.0
25%,1.0,2.0,0.0,133199.5,1.0,41.0
50%,1.0,3.0,0.0,144947.5,2.0,43.0
75%,1.0,4.0,1.0,166386.75,4.0,44.0
max,3.0,11.0,6.0,451887.0,4.0,46.0
