In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

## Summarizing and Computing Descriptive Statistics

In [3]:
df = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                [np.nan, np.nan], [0.75, -1.3]],
                index=["a", "b", "c", "d"],
                columns=["one", "two"])

df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [None]:
# ustunlar bo'yicha yig'indi

df.sum()

one    9.25
two   -5.80
dtype: float64

In [5]:
# qatorlar bo'yicha yig'indi

df.sum(axis='columns')

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [None]:
# skipna NaN ni qo'shib hisoblaydi

df.sum(skipna=False)

one   NaN
two   NaN
dtype: float64

In [7]:
# skipna NaN ni qo'shib hisoblaydi

df.sum(axis="columns", skipna=False)

a     NaN
b    2.60
c     NaN
d   -0.55
dtype: float64

In [8]:
df.mean()

one    3.083333
two   -2.900000
dtype: float64

In [None]:
df.max()

one    7.1
two   -1.3
dtype: float64

In [11]:
# "idmax" ketta elamentni id sini chiqarib beradi

df.idxmax()

one    b
two    d
dtype: object

In [12]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [14]:
# yig'ilib boruvchi yig'indi

df.cumsum()

Unnamed: 0,one,two
a,1.4,
b,8.5,-4.5
c,,
d,9.25,-5.8


In [15]:
# bu bizga statistikani chiqarib beradi

df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [16]:
obj = pd.Series(["a", "a", "b", "c"] * 4)
obj

0     a
1     a
2     b
3     c
4     a
5     a
6     b
7     c
8     a
9     a
10    b
11    c
12    a
13    a
14    b
15    c
dtype: object

In [17]:
obj.describe()

count     16
unique     3
top        a
freq       8
dtype: object

## Correlation and Covariance

In [19]:
price = pd.read_pickle("./datasets/yahoo_price.pkl")
volume = pd.read_pickle("./datasets/yahoo_volume.pkl")

In [23]:
price.head()

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,27.990226,313.062468,113.304536,25.884104
2010-01-05,28.038618,311.683844,111.935822,25.892466
2010-01-06,27.592626,303.826685,111.208683,25.733566
2010-01-07,27.541619,296.753749,110.823732,25.465944
2010-01-08,27.724725,300.709808,111.935822,25.641571


In [24]:
volume.head()

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,123432400,3927000,6155300,38409100
2010-01-05,150476200,6031900,6841400,49749600
2010-01-06,138040000,7987100,5605300,58182400
2010-01-07,119282800,12876600,5840600,50559700
2010-01-08,111902700,9483900,4197200,51197400


In [None]:
# pct_change narxlarning o'zgarishi. foizlarda o'zgarishi. percantage_change

returns = price.pct_change()
returns.head()

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-01-04,,,,
2010-01-05,0.001729,-0.004404,-0.01208,0.000323
2010-01-06,-0.015906,-0.025209,-0.006496,-0.006137
2010-01-07,-0.001849,-0.02328,-0.003462,-0.0104
2010-01-08,0.006648,0.013331,0.010035,0.006897


In [None]:
# MSFT va IBM o'rtasidagi bog'liqlik

msft_ibm_corr = returns['MSFT'].corr(returns['IBM'])
print(f"MSFT va IBM corr: {msft_ibm_corr:.2f}")

msft_ibm_cov = returns['MSFT'].cov(returns['IBM'])
print(f"MSFT va IBM cov: {msft_ibm_cov:.2f}")

MSFT va IBM corr: 0.50
MSFT va IBM cov: 0.00


In [30]:
returns.corr()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,1.0,0.407919,0.386817,0.389695
GOOG,0.407919,1.0,0.405099,0.465919
IBM,0.386817,0.405099,1.0,0.499764
MSFT,0.389695,0.465919,0.499764,1.0


In [None]:
# corrwith qandaydir qiymat bilan tekshirib ko'rish 

returns.corrwith(returns['IBM'])

AAPL    0.386817
GOOG    0.405099
IBM     1.000000
MSFT    0.499764
dtype: float64

In [32]:

returns.corrwith(volume)

AAPL   -0.075565
GOOG   -0.007067
IBM    -0.204849
MSFT   -0.092950
dtype: float64

## Unique Values, Value Counts, and Membership

In [33]:
obj = pd.Series(["c", "a", "d", "a", "a", "b", "b", "c", "c"])
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [34]:
uniques = obj.unique()
uniques

array(['c', 'a', 'd', 'b'], dtype=object)

In [36]:
obj.value_counts()

c    3
a    3
b    2
d    1
Name: count, dtype: int64

In [None]:
#  obj ni b va c ning ichida bor yo'qligini tekshirib beradi

mask = obj.isin(['b', 'c'])
mask

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool

In [38]:
data = pd.DataFrame({"Qu1": [1, 3, 4, 3, 4],
                      "Qu2": [2, 3, 1, 2, 3],
                      "Qu3": [1, 5, 2, 4, 4]})

data

Unnamed: 0,Qu1,Qu2,Qu3
0,1,2,1
1,3,3,5
2,4,1,2
3,3,2,4
4,4,3,4


In [40]:
data["Qu1"].value_counts().sort_index()

Qu1
1    1
3    2
4    2
Name: count, dtype: int64

In [43]:
result = data.apply(pd.value_counts).fillna(0)
result

  result = data.apply(pd.value_counts).fillna(0)


Unnamed: 0,Qu1,Qu2,Qu3
1,1.0,1.0,1.0
2,0.0,2.0,1.0
3,2.0,2.0,0.0
4,2.0,0.0,2.0
5,0.0,0.0,1.0
