## Zaman Serilerinin Temelleri (Time Series Basics)

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime

In [2]:
date=[datetime(2020,1,5),
      datetime(2020,1,10),
      datetime(2020,1,15),
      datetime(2020,1,20),
      datetime(2020,1,25)] 
#bu bizim tarih nesnemiz.

In [3]:
ts=pd.Series(np.random.randn(5),index=date)
ts
#indeksi bu tarih yapısı olan bir series oluşturduk.

2020-01-05   -0.725464
2020-01-10    2.243310
2020-01-15   -2.113948
2020-01-20   -2.456400
2020-01-25   -0.931941
dtype: float64

In [4]:
ts.index

DatetimeIndex(['2020-01-05', '2020-01-10', '2020-01-15', '2020-01-20',
               '2020-01-25'],
              dtype='datetime64[ns]', freq=None)

In [5]:
pd.to_datetime("01/01/2020") 
#tek bir tarihi zaman damgasına çevirmek için pd.to_datetime fonksiyonu kullanılır.Zaman damgalı tarih oluşturmuş olduk.

Timestamp('2020-01-01 00:00:00')

In [6]:
dates=pd.to_datetime(
    [datetime(2020,7,5),
     "6th of July, 2020",
     "2020-Jul-7",
     "20200708"])
dates
#farklı formatlardaki tarihleri datetime indeks nesnesine çevirdik.

DatetimeIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='datetime64[ns]', freq=None)

In [7]:
dates.to_period("D")
#indeks yapısını periyot indeks yapısına çevirdik.

PeriodIndex(['2020-07-05', '2020-07-06', '2020-07-07', '2020-07-08'], dtype='period[D]')

In [8]:
dates - dates[0]
#timedelta indeks yapısında bir veri oluşmuş oldu.

TimedeltaIndex(['0 days', '1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None)

In [9]:
pd.date_range("2020-08-15","2020-09-01") 
#düzenli tarih serileri oluşturmak için pandas çeşitli fonksiyonları vardır.

DatetimeIndex(['2020-08-15', '2020-08-16', '2020-08-17', '2020-08-18',
               '2020-08-19', '2020-08-20', '2020-08-21', '2020-08-22',
               '2020-08-23', '2020-08-24', '2020-08-25', '2020-08-26',
               '2020-08-27', '2020-08-28', '2020-08-29', '2020-08-30',
               '2020-08-31', '2020-09-01'],
              dtype='datetime64[ns]', freq='D')

In [10]:
pd.date_range('2020-07-15', periods=10)

DatetimeIndex(['2020-07-15', '2020-07-16', '2020-07-17', '2020-07-18',
               '2020-07-19', '2020-07-20', '2020-07-21', '2020-07-22',
               '2020-07-23', '2020-07-24'],
              dtype='datetime64[ns]', freq='D')

In [11]:
pd.date_range("2020-07-15",
              periods=10,
              freq="H")
#Burada frekansımız saat olan H yazıldı.

DatetimeIndex(['2020-07-15 00:00:00', '2020-07-15 01:00:00',
               '2020-07-15 02:00:00', '2020-07-15 03:00:00',
               '2020-07-15 04:00:00', '2020-07-15 05:00:00',
               '2020-07-15 06:00:00', '2020-07-15 07:00:00',
               '2020-07-15 08:00:00', '2020-07-15 09:00:00'],
              dtype='datetime64[ns]', freq='H')

In [12]:
pd.period_range("2020-10", 
                periods=10,
                freq="M")
#Burada frekansımız aylık olan M'dir.

PeriodIndex(['2020-10', '2020-11', '2020-12', '2021-01', '2021-02', '2021-03',
             '2021-04', '2021-05', '2021-06', '2021-07'],
            dtype='period[M]')

In [13]:
pd.timedelta_range(0,periods=8,freq="H")
#1 saatlik artış olan süre serisi yazılmış oldu.

TimedeltaIndex(['0 days 00:00:00', '0 days 01:00:00', '0 days 02:00:00',
                '0 days 03:00:00', '0 days 04:00:00', '0 days 05:00:00',
                '0 days 06:00:00', '0 days 07:00:00'],
               dtype='timedelta64[ns]', freq='H')

In [14]:
ts

2020-01-05   -0.725464
2020-01-10    2.243310
2020-01-15   -2.113948
2020-01-20   -2.456400
2020-01-25   -0.931941
dtype: float64

In [15]:
stamp=ts.index[1]
stamp

Timestamp('2020-01-10 00:00:00')

In [16]:
ts[stamp]

2.2433097358515117

In [17]:
ts["25.1.2020"]

-0.931941044157666

In [18]:
ts["20200125"]

-0.931941044157666

In [19]:
long_ts=pd.Series(
    np.random.randn(1000),
    index=pd.date_range("1/1/2020",
                        periods=1000))
long_ts.head()


2020-01-01    0.656305
2020-01-02    1.000173
2020-01-03   -0.256255
2020-01-04    0.096813
2020-01-05   -0.517981
Freq: D, dtype: float64

In [21]:
long_ts["2020"].head()

2020-01-01    0.656305
2020-01-02    1.000173
2020-01-03   -0.256255
2020-01-04    0.096813
2020-01-05   -0.517981
Freq: D, dtype: float64

In [22]:
long_ts[datetime(2022,9,20):]

2022-09-20    2.188051
2022-09-21   -0.032697
2022-09-22    0.849236
2022-09-23   -0.723886
2022-09-24    0.287534
2022-09-25    0.073994
2022-09-26   -1.132905
Freq: D, dtype: float64

In [23]:
ts

2020-01-05   -0.725464
2020-01-10    2.243310
2020-01-15   -2.113948
2020-01-20   -2.456400
2020-01-25   -0.931941
dtype: float64

In [24]:
ts.truncate(after="1/15/20")

2020-01-05   -0.725464
2020-01-10    2.243310
2020-01-15   -2.113948
dtype: float64

In [25]:
date=pd.date_range("1/1/2020",
                   periods=100,
                   freq="W-SUN")
#Frekans olarak haftalık pazar günleri.

In [26]:
long_df=pd.DataFrame(np.random.randn(100,4),
                    index=date,
                    columns=list("ABCD"))
long_df

Unnamed: 0,A,B,C,D
2020-01-05,0.722271,-1.200132,0.210991,2.111678
2020-01-12,0.279522,-0.562225,-0.072408,-0.729355
2020-01-19,0.078482,0.702421,-0.027911,0.539779
2020-01-26,-0.791702,-0.041269,0.302507,0.915190
2020-02-02,0.750998,-0.042905,0.661373,-0.714174
...,...,...,...,...
2021-10-31,-0.383309,-0.871797,0.888292,0.658152
2021-11-07,-0.654462,-0.566114,-1.288055,0.965216
2021-11-14,-0.070934,-0.710807,-0.988605,-1.644835
2021-11-21,-0.577140,-0.609960,0.170125,0.983523


In [28]:
long_df["2020-10"]

  long_df["2020-10"]


Unnamed: 0,A,B,C,D
2020-10-04,-0.614886,0.077693,0.075524,1.007424
2020-10-11,-1.211895,0.035939,-1.516686,-0.998715
2020-10-18,-0.008162,1.236021,0.768711,0.201159
2020-10-25,0.458592,0.186332,0.051784,0.209372


In [29]:
date=pd.DatetimeIndex(
    ["1/1/2020","1/2/2020","1/2/2020",
     "1/2/2020","1/3/2020"])
ts1=pd.Series(np.arange(5),index=date)
ts1

2020-01-01    0
2020-01-02    1
2020-01-02    2
2020-01-02    3
2020-01-03    4
dtype: int32

In [31]:
ts1.index.is_unique 
#False değeri tekarar eden indeksler olduğunu ifade eder.

False

In [32]:
group=ts1.groupby(level=0) 

In [34]:
group.count()
#gruplanmış tarihlerin kaçar kez tekrar ettiğini görüyoruz.

2020-01-01    1
2020-01-02    3
2020-01-03    1
dtype: int64

In [35]:
group.mean()

2020-01-01    0.0
2020-01-02    2.0
2020-01-03    4.0
dtype: float64