## 1. Import Relevant Packages

In [1]:
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt

import statsmodels.api as sm

from statsmodels.regression.rolling import RollingOLS

from functools import reduce
import math

## 1. Variables for ANN 

**Market variables** (merges på date)
* market return, cumulative market return, standard deviation

**Stock variables** (merges på date og permno)
* alpha
* beta 
* stock standard deviation (12M)
* (LSTM input file) stock cumulative returns (11M) 
* stock returns (1M)

**Potential additional data to include**
* Idiosyncreatic returns
* Yield curve

## 2. Importing and preprocessing data

### CRSP data

In [2]:
# OBS: Change file to "LSTM file" when momentum strategy code is discussed 
crsp_m = pd.read_csv('Input.csv', low_memory=False)

In [3]:
# Make Python datetime format
crsp_m['date'] =  pd.to_datetime(crsp_m['date'])

In [4]:
crsp_m=crsp_m.sort_values(by='date')
crsp_m

Unnamed: 0,permno,date,cumret,ret
0,10006,1963-01-31,,0.047002
17454,10890,1963-01-31,,0.062222
87001,15472,1963-01-31,,0.031250
203053,23691,1963-01-31,,0.030405
70065,14250,1963-01-31,,0.087379
...,...,...,...,...
96711,16431,2020-12-31,-0.128431,0.087870
492344,57808,2020-12-31,0.044899,0.074638
97407,16432,2020-12-31,-0.169077,0.047025
493412,57817,2020-12-31,-0.132861,0.204089


In [5]:
sorted_crsp = crsp_m.sort_values(['permno','date']).set_index('date')

In [6]:
sorted_crsp.head(11)

Unnamed: 0_level_0,permno,cumret,ret
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1963-01-31,10006,,0.047002
1963-02-28,10006,,0.0387
1963-03-31,10006,,-0.009009
1963-04-30,10006,,0.084848
1963-05-31,10006,,0.09162
1963-06-30,10006,,-0.001289
1963-07-31,10006,,-0.045161
1963-08-31,10006,,0.121081
1963-09-30,10006,,-0.07767
1963-10-31,10006,,0.057895


### Fama French data

In [7]:
# Loading Fama French 3 Factors (only include rows with factors)
ff_3f = pd.read_csv('FF_3.csv', skiprows = 3, nrows=1134, index_col = 0)

In [8]:
# Get in decimal format 
ff_3f = ff_3f / 100

In [9]:
# Checking
ff_3f.head()

Unnamed: 0,Mkt-RF,SMB,HML,RF
192607,0.0296,-0.023,-0.0287,0.0022
192608,0.0264,-0.014,0.0419,0.0025
192609,0.0036,-0.0132,0.0001,0.0023
192610,-0.0324,0.0004,0.0051,0.0032
192611,0.0253,-0.002,-0.0035,0.0031


In [10]:
# Setting datetime format (in months)
ff_3f.index = pd.to_datetime(ff_3f.index, format= '%Y%m')
ff_3f.index = ff_3f.index.to_period('M')

In [11]:
# Ensuring no errors 
ff_3f['RF'] = pd.to_numeric(ff_3f['RF'], errors='coerce') 
ff_3f['Mkt-RF'] = pd.to_numeric(ff_3f['Mkt-RF'], errors='coerce') 

## 4. Market variables

In [12]:
# Calculating market return
market = pd.DataFrame()
market['Mkt'] = ff_3f['Mkt-RF'] + ff_3f['RF']

In [13]:
# Calculating log and sum of log returns (cumulative market return) over 12 months
market_log = market.copy()
market_log['Mkt_logret']= np.log(1+market['Mkt'])
market_log['Mkt_cumlog'] = market_log['Mkt_logret'].rolling(12, min_periods=12).sum()

# Get non-log
market['Mkt_cumret'] = np.exp(market_log['Mkt_cumlog'])-1

In [14]:
market
market['Mkt_std'] = market['Mkt'].rolling(12, min_periods=12).std()

In [15]:
market.head(12)

Unnamed: 0,Mkt,Mkt_cumret,Mkt_std
1926-07,0.0318,,
1926-08,0.0289,,
1926-09,0.0059,,
1926-10,-0.0292,,
1926-11,0.0284,,
1926-12,0.029,,
1927-01,0.0019,,
1927-02,0.0444,,
1927-03,0.0043,,
1927-04,0.0071,,


In [16]:
input_market = market[market.index >= '1963-01']

In [17]:
input_market.to_csv('input_market.csv', index=True)

## 3. Stock variables 

### 3.1 Return

In [18]:
# Make into pivot format
sorted_crsp_p = sorted_crsp.pivot(columns='permno', values=['ret'])

In [19]:
sorted_ret = sorted_crsp_p['ret']

In [20]:
# Ensure that there is no N/A values in between returns
sorted_ret[sorted_ret.bfill().notnull() & sorted_ret.ffill().notnull()] = sorted_ret.fillna(0)
sorted_ret[sorted_ret.bfill().notnull() & sorted_ret.ffill().notnull()] = sorted_ret.fillna(0)

In [21]:
# Return CSV file
m_sorted_ret = sorted_ret.copy()

sorted_ret.index = sorted_ret.index.to_period('M')

In [22]:
sorted_ret

permno,10006,10030,10078,10095,10102,10104,10107,10108,10119,10137,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-01,0.047002,0.056122,,,0.041475,,,,,0.053846,...,,,,,,,,,,
1963-02,0.038700,0.024155,,,-0.004425,,,,,-0.004866,...,,,,,,,,,,
1963-03,-0.009009,0.053774,,,0.047191,,,,,-0.012714,...,,,,,,,,,,
1963-04,0.084848,-0.015837,,,-0.021459,,,,,0.017500,...,,,,,,,,,,
1963-05,0.091620,-0.032184,,,-0.008772,,,,,-0.027518,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,0.031921,0.102580,,,,...,,0.146067,0.042345,,0.205610,,0.110609,-0.115646,,0.741452
2020-09,,,,,,0.043341,-0.067397,,,,...,,-0.169550,-0.064687,,0.019265,,-0.050220,-0.305538,,-0.139087
2020-10,,,,,,-0.056114,-0.037370,,,,...,,0.011111,0.118610,,0.085261,,-0.021480,-0.003102,,-0.095499
2020-11,,,,,,0.028694,0.060058,,,,...,,0.239698,0.128435,,0.025934,,0.165653,0.788889,,0.462736


In [23]:
sorted_ret.to_csv('input_ret.csv', index=True)

### 3.2 Merge

In [24]:
m_sorted_ret.reset_index(inplace=True)
m_sorted_ret

permno,date,10006,10030,10078,10095,10102,10104,10107,10108,10119,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
0,1963-01-31,0.047002,0.056122,,,0.041475,,,,,...,,,,,,,,,,
1,1963-02-28,0.038700,0.024155,,,-0.004425,,,,,...,,,,,,,,,,
2,1963-03-31,-0.009009,0.053774,,,0.047191,,,,,...,,,,,,,,,,
3,1963-04-30,0.084848,-0.015837,,,-0.021459,,,,,...,,,,,,,,,,
4,1963-05-31,0.091620,-0.032184,,,-0.008772,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
691,2020-08-31,,,,,,0.031921,0.102580,,,...,,0.146067,0.042345,,0.205610,,0.110609,-0.115646,,0.741452
692,2020-09-30,,,,,,0.043341,-0.067397,,,...,,-0.169550,-0.064687,,0.019265,,-0.050220,-0.305538,,-0.139087
693,2020-10-31,,,,,,-0.056114,-0.037370,,,...,,0.011111,0.118610,,0.085261,,-0.021480,-0.003102,,-0.095499
694,2020-11-30,,,,,,0.028694,0.060058,,,...,,0.239698,0.128435,,0.025934,,0.165653,0.788889,,0.462736


In [25]:
# Revert back to long forrmat to match with 3 factors
sorted_ret_long = pd.melt(m_sorted_ret, id_vars='date', value_name='ret')
sorted_ret_long

Unnamed: 0,date,permno,ret
0,1963-01-31,10006,0.047002
1,1963-02-28,10006,0.038700
2,1963-03-31,10006,-0.009009
3,1963-04-30,10006,0.084848
4,1963-05-31,10006,0.091620
...,...,...,...
1809595,2020-08-31,93436,0.741452
1809596,2020-09-30,93436,-0.139087
1809597,2020-10-31,93436,-0.095499
1809598,2020-11-30,93436,0.462736


In [26]:
# Setting datetime format (in months)
sorted_ret_long['date'] = pd.to_datetime(sorted_ret_long.date)
sorted_ret_long.index = sorted_ret_long['date']
sorted_ret_long.index = sorted_ret_long.index.to_period('M')

# We drop values of permnos when they are not trading (entered during wide format) 
sorted_ret_long.dropna(subset=['ret'], inplace=True)

In [27]:
# Merging stock data and 3 factors  
merged = pd.merge(sorted_ret_long, ff_3f, how='inner', left_index=True, right_index=True)
merged

Unnamed: 0,date,permno,ret,Mkt-RF,SMB,HML,RF
1963-01,1963-01-31,10006,0.047002,0.0493,0.0307,0.0222,0.0025
1963-01,1963-01-31,10030,0.056122,0.0493,0.0307,0.0222,0.0025
1963-01,1963-01-31,10102,0.041475,0.0493,0.0307,0.0222,0.0025
1963-01,1963-01-31,10137,0.053846,0.0493,0.0307,0.0222,0.0025
1963-01,1963-01-31,10145,0.025424,0.0493,0.0307,0.0222,0.0025
...,...,...,...,...,...,...,...
2020-12,2020-12-31,93179,0.038115,0.0463,0.0481,-0.0136,0.0001
2020-12,2020-12-31,93246,0.054777,0.0463,0.0481,-0.0136,0.0001
2020-12,2020-12-31,93312,0.056031,0.0463,0.0481,-0.0136,0.0001
2020-12,2020-12-31,93422,0.484472,0.0463,0.0481,-0.0136,0.0001


In [28]:
# Revert back to wide format 
merged = merged.pivot(columns='permno', values=['ret', 'Mkt-RF', 'RF'])

### 3.1b. Alpha and beta

In [29]:
# Calculating y and x variable for CAPM regression
excess_return = merged.ret - merged.RF
market_return = merged['Mkt-RF']

In [30]:
# Estimating alpha and beta
CAPM_dict = {}

for stock in excess_return.columns:
    y=excess_return[stock]
    X=sm.add_constant(market_return[stock])
    rols = RollingOLS(y, X, window=12, min_nobs=12) #don't include NaN values
    rres = rols.fit()
    
    CAPM_dict[stock] = rres.params

In [31]:
# Create alpha_beta dataframe
alpha_beta = pd.DataFrame.from_dict({(i,j): CAPM_dict[i][j]
                                  for i in CAPM_dict.keys()
                                    for j in CAPM_dict[i].keys()})

alpha_beta

Unnamed: 0_level_0,10006,10006,10030,10030,10078,10078,10095,10095,10102,10102,...,93295,93295,93312,93312,93422,93422,93429,93429,93436,93436
Unnamed: 0_level_1,const,10006,const,10030,const,10078,const,10095,const,10102,...,const,93295,const,93312,const,93422,const,93429,const,93436
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,,,,,,...,,,0.002196,1.574106,-0.040728,7.214049,,,0.197264,2.540669
2020-09,,,,,,,,,,,...,,,-0.004038,1.571528,-0.038941,7.216301,,,0.187527,2.758411
2020-10,,,,,,,,,,,...,,,-0.000907,1.565931,-0.004837,7.141584,,,0.162212,2.876685
2020-11,,,,,,,,,,,...,,,-0.009964,1.495338,0.012096,7.074660,,,0.176745,2.852081


In [32]:
# Beta dataframe
beta = pd.DataFrame.from_dict({(i): CAPM_dict[i][j]
                                  for i in CAPM_dict.keys()
                                    for j in CAPM_dict[i].keys()})


In [33]:
# Example
beta[18200][beta.index=='1964-01']

1964-01    1.815456
Freq: M, Name: 18200, dtype: float64

In [34]:
# Save as csv file
beta.to_csv('input_beta.csv', index=True)

In [35]:
# Take every second column
alpha = alpha_beta[alpha_beta.columns[::2]]

# Drop 1st column layer
alpha.columns = alpha.columns.droplevel(1)

# Alpha dataframe
alpha.tail()

Unnamed: 0,10006,10030,10078,10095,10102,10104,10107,10108,10119,10137,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
2020-08,,,,,,0.000761,0.031718,,,,...,,-0.058256,-0.071957,,0.063669,,0.002196,-0.040728,,0.197264
2020-09,,,,,,0.002434,0.026963,,,,...,,-0.059112,-0.073647,,0.068739,,-0.004038,-0.038941,,0.187527
2020-10,,,,,,-0.000688,0.023302,,,,...,,-0.051606,-0.053665,,0.060238,,-0.000907,-0.004837,,0.162212
2020-11,,,,,,-0.003005,0.019965,,,,...,,-0.035008,-0.054819,,0.058314,,-0.009964,0.012096,,0.176745
2020-12,,,,,,0.01033,0.018969,,,,...,,-0.038575,-0.056681,,0.060311,,-0.009374,0.009519,,0.170896


In [43]:
# Save as csv file
alpha.to_csv('input_alpha.csv', index=True)

### 3.2. Standard deviation

In [44]:
#Calculating 12M rolling stddev
std_12M = sorted_ret.rolling(12, min_periods=12).std()
std_12M.head(12)

permno,10006,10030,10078,10095,10102,10104,10107,10108,10119,10137,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
1963-06,,,,,,,,,,,...,,,,,,,,,,
1963-07,,,,,,,,,,,...,,,,,,,,,,
1963-08,,,,,,,,,,,...,,,,,,,,,,
1963-09,,,,,,,,,,,...,,,,,,,,,,
1963-10,,,,,,,,,,,...,,,,,,,,,,


In [45]:
std_12M[18200].head(25)

date
1963-01         NaN
1963-02         NaN
1963-03         NaN
1963-04         NaN
1963-05         NaN
1963-06         NaN
1963-07         NaN
1963-08         NaN
1963-09         NaN
1963-10         NaN
1963-11         NaN
1963-12    0.055392
1964-01    0.064717
1964-02    0.075923
1964-03    0.073410
1964-04    0.088196
1964-05    0.088316
1964-06    0.087285
1964-07    0.086409
1964-08    0.080348
1964-09    0.078832
1964-10    0.077004
1964-11    0.077285
1964-12    0.080106
1965-01    0.076422
Freq: M, Name: 18200, dtype: float64

In [46]:
std_12M.tail()

permno,10006,10030,10078,10095,10102,10104,10107,10108,10119,10137,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-08,,,,,,0.044897,0.055553,,,,...,,0.15478,0.124558,,0.113947,,0.122183,0.675236,,0.265988
2020-09,,,,,,0.04376,0.063711,,,,...,,0.160303,0.12233,,0.113105,,0.122109,0.685408,,0.284767
2020-10,,,,,,0.047577,0.067338,,,,...,,0.160268,0.132164,,0.102976,,0.122692,0.68375,,0.298346
2020-11,,,,,,0.047502,0.067362,,,,...,,0.172423,0.139432,,0.102715,,0.123604,0.712304,,0.303445
2020-12,,,,,,0.053866,0.067331,,,,...,,0.172258,0.139411,,0.101664,,0.124064,0.716133,,0.303262


In [47]:
# Standard deviation to CSV
std_12M.to_csv('input_std.csv', index=True)

### 3.3. Cumulative return

In [48]:
log_df = np.log(1+sorted_ret)
log_df

  log_df = np.log(1+sorted_ret)


permno,10006,10030,10078,10095,10102,10104,10107,10108,10119,10137,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-01,0.045931,0.054604,,,0.040638,,,,,0.052446,...,,,,,,,,,,
1963-02,0.037970,0.023868,,,-0.004435,,,,,-0.004878,...,,,,,,,,,,
1963-03,-0.009050,0.052378,,,0.046111,,,,,-0.012796,...,,,,,,,,,,
1963-04,0.081440,-0.015964,,,-0.021693,,,,,0.017349,...,,,,,,,,,,
1963-05,0.087663,-0.032713,,,-0.008811,,,,,-0.027904,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-08,,,,,,0.031422,0.097653,,,,...,,0.136336,0.041473,,0.186986,,0.104909,-0.122898,,0.554719
2020-09,,,,,,0.042428,-0.069776,,,,...,,-0.185788,-0.066874,,0.019082,,-0.051525,-0.364618,,-0.149762
2020-10,,,,,,-0.057750,-0.038086,,,,...,,0.011050,0.112087,,0.081821,,-0.021714,-0.003107,,-0.100372
2020-11,,,,,,0.028290,0.058324,,,,...,,0.214868,0.120832,,0.025603,,0.153281,0.581595,,0.380309


In [49]:
cumret_log = log_df.rolling(12, min_periods=12).sum()
cumret = np.exp(cumret_log)-1

In [50]:
cumret.head(30)

permno,10006,10030,10078,10095,10102,10104,10107,10108,10119,10137,...,93152,93174,93179,93223,93246,93295,93312,93422,93429,93436
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1963-01,,,,,,,,,,,...,,,,,,,,,,
1963-02,,,,,,,,,,,...,,,,,,,,,,
1963-03,,,,,,,,,,,...,,,,,,,,,,
1963-04,,,,,,,,,,,...,,,,,,,,,,
1963-05,,,,,,,,,,,...,,,,,,,,,,
1963-06,,,,,,,,,,,...,,,,,,,,,,
1963-07,,,,,,,,,,,...,,,,,,,,,,
1963-08,,,,,,,,,,,...,,,,,,,,,,
1963-09,,,,,,,,,,,...,,,,,,,,,,
1963-10,,,,,,,,,,,...,,,,,,,,,,


In [51]:
# Cumulative return CSV file
cumret.to_csv('input_ret_cum.csv', index=True)

## 4. Output variable

In [52]:
y_file = pd.read_csv('holding_df.csv', low_memory=False)
y_file = y_file[['permno', 'date', 'ret']]
y_file['date'] =  pd.to_datetime(y_file['date'])

In [53]:
# For each date, ranking stocks into 1-10 portfolios based on hpr
# 1 = loser, 10 = winner portfolio 

y_file = y_file.groupby('date').apply(lambda x: x.sort_values(['ret'], ascending = False)).reset_index(drop=True)
y_file['momr'] = y_file.groupby('date')['ret'].transform(lambda x: pd.qcut(x, 10, labels=False))

# For visualization: Shift momr from 0-9 to 1-10 
y_file['momr']=1+y_file['momr']

# Checking the average cumulative return for each portfolio
y_file.groupby('momr')['ret'].mean()

#y_file = y_file.set_index(y_file['date'], inplace=False)
#y_file = y_file[['permno', 'momr']]

momr
1.0    -0.120565
2.0    -0.060033
3.0    -0.034866
4.0    -0.016077
5.0     0.000265
6.0     0.016246
7.0     0.033332
8.0     0.053019
9.0     0.079941
10.0    0.149510
Name: ret, dtype: float64

In [54]:
y_file['momr'] = y_file['momr'].astype(int)

In [55]:
y_month = y_file.set_index('date')
y_month

Unnamed: 0_level_0,permno,ret,momr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1965-01-31,19385,0.316940,10
1965-01-31,12511,0.279605,10
1965-01-31,10890,0.237624,10
1965-01-31,22533,0.232558,10
1965-01-31,14760,0.232274,10
...,...,...,...
2020-12-31,16309,-0.111057,1
2020-12-31,14983,-0.112243,1
2020-12-31,13379,-0.167940,1
2020-12-31,82779,-0.200645,1


In [56]:
y_month.index = y_month.index.to_period('M')
y_month = y_month[y_month.index >= '1965-01']

In [57]:
y_sorted = y_month.sort_values(['date','permno'])
y_sorted

Unnamed: 0_level_0,permno,ret,momr
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1965-01,10006,0.053459,6
1965-01,10030,0.048421,6
1965-01,10102,0.090487,9
1965-01,10137,0.039823,5
1965-01,10145,0.104116,9
...,...,...,...
2020-12,93096,-0.037881,1
2020-12,93132,0.205307,10
2020-12,93246,0.054777,7
2020-12,93312,0.056031,7


In [58]:
y = pd.get_dummies(y_sorted['momr'])

In [59]:
y

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1965-01,0,0,0,0,0,1,0,0,0,0
1965-01,0,0,0,0,0,1,0,0,0,0
1965-01,0,0,0,0,0,0,0,0,1,0
1965-01,0,0,0,0,1,0,0,0,0,0
1965-01,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...
2020-12,1,0,0,0,0,0,0,0,0,0
2020-12,0,0,0,0,0,0,0,0,0,1
2020-12,0,0,0,0,0,0,1,0,0,0
2020-12,0,0,0,0,0,0,1,0,0,0


In [60]:
# Output (target) CSV file
y.to_csv('y_output.csv', index=True)