In [1]:
import numpy as np
import pandas as pd

In [2]:
stocks = ['PLW', 'CDR', '11B', 'TEN']
print(pd.Series(data=stocks))

0    PLW
1    CDR
2    11B
3    TEN
dtype: object


In [4]:
stocks = {'PLW': 387.00, 'CDR': 339.5, 'TEN': 349.5, '11B': 391.0}
quotations = pd.Series(data=stocks)

print(quotations)

PLW    387.0
CDR    339.5
TEN    349.5
11B    391.0
dtype: float64


In [5]:
stocks = {'PLW': 387.00, 'CDR': 339.5, 'TEN': 349.5, '11B': 391.0}
quotations = pd.Series(data=stocks)
quotations = quotations.tolist()

print(quotations)

[387.0, 339.5, 349.5, 391.0]


In [6]:
stocks = {'PLW': 387.00, 'CDR': 339.5, 'TEN': 349.5, '11B': 391.0}
quotations = pd.Series(data=stocks)
quotations = pd.DataFrame(quotations, columns=['price'])

print(quotations)

     price
PLW  387.0
CDR  339.5
TEN  349.5
11B  391.0


In [8]:
s = pd.Series(
    data=np.arange(10, 100, 10),
    index=np.arange(101, 110),
    dtype='float',
)

print(s)

101    10.0
102    20.0
103    30.0
104    40.0
105    50.0
106    60.0
107    70.0
108    80.0
109    90.0
dtype: float64


In [9]:
series = pd.Series(['001', '002', '003', '004'], list('abcd'))
series = pd.to_numeric(series)
print(series)

a    1
b    2
c    3
d    4
dtype: int64


In [10]:
series = pd.Series(['001', '002', '003', '004'], list('abcd'))
series = series.astype(int)
print(series)

a    1
b    2
c    3
d    4
dtype: int64


In [11]:
stocks = {'PLW': 387.00, 'CDR': 339.5, 'TEN': 349.5, '11B': 391.0}
quotations = pd.Series(data=stocks)
quotations = quotations.append(pd.Series({'BBT': 25.5, 'F51': 19.2}))

print(quotations)

PLW    387.0
CDR    339.5
TEN    349.5
11B    391.0
BBT     25.5
F51     19.2
dtype: float64


  quotations = quotations.append(pd.Series({'BBT': 25.5, 'F51': 19.2}))


In [12]:
stocks = {
    'PLW': 387.00,
    'CDR': 339.5,
    'TEN': 349.5,
    '11B': 391.0,
    'BBT': 25.5,
    'F51': 19.2,
}
quotations = pd.Series(data=stocks)
quotations = pd.DataFrame(quotations).reset_index()
quotations.columns = ['ticker', 'price']
print(quotations)

  ticker  price
0    PLW  387.0
1    CDR  339.5
2    TEN  349.5
3    11B  391.0
4    BBT   25.5
5    F51   19.2


In [13]:
data_dict = {
    'company': ['Amazon', 'Microsoft', 'Facebook'],
    'price': [2375.00, 178.6, 179.2],
    'ticker': ['AMZN.US', 'MSFT.US', 'FB.US']
}
 
companies = pd.DataFrame(data=data_dict)
print(companies)

     company   price   ticker
0     Amazon  2375.0  AMZN.US
1  Microsoft   178.6  MSFT.US
2   Facebook   179.2    FB.US


In [14]:
data_dict = {
    'company': ['Amazon', 'Microsoft', 'Facebook'],
    'price': [2375.00, 178.6, 179.2],
    'ticker': ['AMZN.US', 'MSFT.US', 'FB.US']
}
 
companies = pd.DataFrame(data=data_dict)
companies = companies.set_index('company')
print(companies)

            price   ticker
company                   
Amazon     2375.0  AMZN.US
Microsoft   178.6  MSFT.US
Facebook    179.2    FB.US


In [15]:
date_range = pd.date_range(start='2020-01-01', periods=31)
print(date_range)

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',
               '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31'],
              dtype='datetime64[ns]', freq='D')


In [16]:
date_range = pd.date_range(start='2020-01-01', end='2020-01-31')
print(date_range)

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06', '2020-01-07', '2020-01-08',
               '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
               '2020-01-13', '2020-01-14', '2020-01-15', '2020-01-16',
               '2020-01-17', '2020-01-18', '2020-01-19', '2020-01-20',
               '2020-01-21', '2020-01-22', '2020-01-23', '2020-01-24',
               '2020-01-25', '2020-01-26', '2020-01-27', '2020-01-28',
               '2020-01-29', '2020-01-30', '2020-01-31'],
              dtype='datetime64[ns]', freq='D')


In [17]:
date_range = pd.date_range(start='2020-01-01', periods=52, freq='W-MON')
print(date_range)

DatetimeIndex(['2020-01-06', '2020-01-13', '2020-01-20', '2020-01-27',
               '2020-02-03', '2020-02-10', '2020-02-17', '2020-02-24',
               '2020-03-02', '2020-03-09', '2020-03-16', '2020-03-23',
               '2020-03-30', '2020-04-06', '2020-04-13', '2020-04-20',
               '2020-04-27', '2020-05-04', '2020-05-11', '2020-05-18',
               '2020-05-25', '2020-06-01', '2020-06-08', '2020-06-15',
               '2020-06-22', '2020-06-29', '2020-07-06', '2020-07-13',
               '2020-07-20', '2020-07-27', '2020-08-03', '2020-08-10',
               '2020-08-17', '2020-08-24', '2020-08-31', '2020-09-07',
               '2020-09-14', '2020-09-21', '2020-09-28', '2020-10-05',
               '2020-10-12', '2020-10-19', '2020-10-26', '2020-11-02',
               '2020-11-09', '2020-11-16', '2020-11-23', '2020-11-30',
               '2020-12-07', '2020-12-14', '2020-12-21', '2020-12-28'],
              dtype='datetime64[ns]', freq='W-MON')


In [18]:
date_range = pd.date_range(
    start='2020-01-01', end='2020-12-31', freq='W-MON'
)
print(date_range)

DatetimeIndex(['2020-01-06', '2020-01-13', '2020-01-20', '2020-01-27',
               '2020-02-03', '2020-02-10', '2020-02-17', '2020-02-24',
               '2020-03-02', '2020-03-09', '2020-03-16', '2020-03-23',
               '2020-03-30', '2020-04-06', '2020-04-13', '2020-04-20',
               '2020-04-27', '2020-05-04', '2020-05-11', '2020-05-18',
               '2020-05-25', '2020-06-01', '2020-06-08', '2020-06-15',
               '2020-06-22', '2020-06-29', '2020-07-06', '2020-07-13',
               '2020-07-20', '2020-07-27', '2020-08-03', '2020-08-10',
               '2020-08-17', '2020-08-24', '2020-08-31', '2020-09-07',
               '2020-09-14', '2020-09-21', '2020-09-28', '2020-10-05',
               '2020-10-12', '2020-10-19', '2020-10-26', '2020-11-02',
               '2020-11-09', '2020-11-16', '2020-11-23', '2020-11-30',
               '2020-12-07', '2020-12-14', '2020-12-21', '2020-12-28'],
              dtype='datetime64[ns]', freq='W-MON')


In [19]:
date_range = pd.date_range(start='2021-01-01', periods=24, freq='H')
print(date_range)

DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 01:00:00',
               '2021-01-01 02:00:00', '2021-01-01 03:00:00',
               '2021-01-01 04:00:00', '2021-01-01 05:00:00',
               '2021-01-01 06:00:00', '2021-01-01 07:00:00',
               '2021-01-01 08:00:00', '2021-01-01 09:00:00',
               '2021-01-01 10:00:00', '2021-01-01 11:00:00',
               '2021-01-01 12:00:00', '2021-01-01 13:00:00',
               '2021-01-01 14:00:00', '2021-01-01 15:00:00',
               '2021-01-01 16:00:00', '2021-01-01 17:00:00',
               '2021-01-01 18:00:00', '2021-01-01 19:00:00',
               '2021-01-01 20:00:00', '2021-01-01 21:00:00',
               '2021-01-01 22:00:00', '2021-01-01 23:00:00'],
              dtype='datetime64[ns]', freq='H')


In [20]:
date_range = pd.date_range(
    start='2021-01-01', end='2021-01-02', freq='H', closed='left'
)
print(date_range)

DatetimeIndex(['2021-01-01 00:00:00', '2021-01-01 01:00:00',
               '2021-01-01 02:00:00', '2021-01-01 03:00:00',
               '2021-01-01 04:00:00', '2021-01-01 05:00:00',
               '2021-01-01 06:00:00', '2021-01-01 07:00:00',
               '2021-01-01 08:00:00', '2021-01-01 09:00:00',
               '2021-01-01 10:00:00', '2021-01-01 11:00:00',
               '2021-01-01 12:00:00', '2021-01-01 13:00:00',
               '2021-01-01 14:00:00', '2021-01-01 15:00:00',
               '2021-01-01 16:00:00', '2021-01-01 17:00:00',
               '2021-01-01 18:00:00', '2021-01-01 19:00:00',
               '2021-01-01 20:00:00', '2021-01-01 21:00:00',
               '2021-01-01 22:00:00', '2021-01-01 23:00:00'],
              dtype='datetime64[ns]', freq='H')


  date_range = pd.date_range(


In [21]:
date_range = pd.date_range(start='2021-03-01', periods=31)
df = pd.DataFrame(data=date_range, columns=['day'])
df['day_of_year'] = df['day'].dt.dayofyear
print(df)

          day  day_of_year
0  2021-03-01           60
1  2021-03-02           61
2  2021-03-03           62
3  2021-03-04           63
4  2021-03-05           64
5  2021-03-06           65
6  2021-03-07           66
7  2021-03-08           67
8  2021-03-09           68
9  2021-03-10           69
10 2021-03-11           70
11 2021-03-12           71
12 2021-03-13           72
13 2021-03-14           73
14 2021-03-15           74
15 2021-03-16           75
16 2021-03-17           76
17 2021-03-18           77
18 2021-03-19           78
19 2021-03-20           79
20 2021-03-21           80
21 2021-03-22           81
22 2021-03-23           82
23 2021-03-24           83
24 2021-03-25           84
25 2021-03-26           85
26 2021-03-27           86
27 2021-03-28           87
28 2021-03-29           88
29 2021-03-30           89
30 2021-03-31           90


In [22]:
np.random.seed(42)
data_dict = {
    'normal': np.random.normal(loc=0, scale=1, size=1000),
    'uniform': np.random.uniform(low=0, high=1, size=1000),
    'binomial': np.random.binomial(n=1, p=0.2, size=1000),
}
 
df = pd.DataFrame(
    data=data_dict, index=pd.date_range('2020-01-01', periods=1000)
)
print(df)

              normal   uniform  binomial
2020-01-01  0.496714  0.167483         0
2020-01-02 -0.138264  0.104568         0
2020-01-03  0.647689  0.636430         0
2020-01-04  1.523030  0.706476         0
2020-01-05 -0.234153  0.031586         1
...              ...       ...       ...
2022-09-22 -0.281100  0.280683         1
2022-09-23  1.797687  0.206993         0
2022-09-24  0.640843  0.516573         0
2022-09-25 -0.571179  0.005506         1
2022-09-26  0.572583  0.007664         0

[1000 rows x 3 columns]


In [23]:
np.random.seed(42)
data_dict = {
    'normal': np.random.normal(loc=0, scale=1, size=1000),
    'uniform': np.random.uniform(low=0, high=1, size=1000),
    'binomial': np.random.binomial(n=1, p=0.2, size=1000),
}
 
df = pd.DataFrame(
    data=data_dict, index=pd.date_range('2020-01-01', periods=1000)
)
print(df.head(10))
print()
print(df.tail())

              normal   uniform  binomial
2020-01-01  0.496714  0.167483         0
2020-01-02 -0.138264  0.104568         0
2020-01-03  0.647689  0.636430         0
2020-01-04  1.523030  0.706476         0
2020-01-05 -0.234153  0.031586         1
2020-01-06 -0.234137  0.936212         0
2020-01-07  1.579213  0.051971         0
2020-01-08  0.767435  0.541296         0
2020-01-09 -0.469474  0.709061         1
2020-01-10  0.542560  0.870969         0

              normal   uniform  binomial
2022-09-22 -0.281100  0.280683         1
2022-09-23  1.797687  0.206993         0
2022-09-24  0.640843  0.516573         0
2022-09-25 -0.571179  0.005506         1
2022-09-26  0.572583  0.007664         0


In [24]:
np.random.seed(42)
data_dict = {
    'normal': np.random.normal(loc=0, scale=1, size=1000),
    'uniform': np.random.uniform(low=0, high=1, size=1000),
    'binomial': np.random.binomial(n=1, p=0.2, size=1000),
}
 
df = pd.DataFrame(
    data=data_dict, index=pd.date_range('2020-01-01', periods=1000)
)
print(df.info())
print()
print(df.describe())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1000 entries, 2020-01-01 to 2022-09-26
Freq: D
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   normal    1000 non-null   float64
 1   uniform   1000 non-null   float64
 2   binomial  1000 non-null   int64  
dtypes: float64(2), int64(1)
memory usage: 31.2 KB
None

            normal      uniform    binomial
count  1000.000000  1000.000000  1000.00000
mean      0.019332     0.503649     0.19200
std       0.979216     0.288357     0.39407
min      -3.241267     0.003218     0.00000
25%      -0.647590     0.247199     0.00000
50%       0.025301     0.516126     0.00000
75%       0.647944     0.746319     0.00000
max       3.852731     0.999414     1.00000


In [25]:
np.random.seed(42)
data_dict = {
    'normal': np.random.normal(loc=0, scale=1, size=1000),
    'uniform': np.random.uniform(low=0, high=1, size=1000),
    'binomial': np.random.binomial(n=1, p=0.2, size=1000),
}
 
df = pd.DataFrame(
    data=data_dict, index=pd.date_range('2020-01-01', periods=1000)
)
print(df['binomial'].value_counts())

0    808
1    192
Name: binomial, dtype: int64
