In [1]:
#import all necessary Python libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go

In [2]:
#read csv and create dataframe for long s from it - NA are replaced by empty strings
df_slong = pd.read_csv('M10_slong.csv')
df_slong.set_index('folio')
df_slong = df_slong.fillna('')
df_slong.head()

Unnamed: 0,folio,line,type,word,pre_text,after_text
0,143r,N001,#slong,|s|amen,,amen
1,143r,N002,#slong,|s|ein,,ein
2,143r,N003,#slong,vi|s|ch,vi,ch
3,143r,N004,#slong,|s|chueppen,,chueppen
4,143r,N007,#slong,|s|ewt,,ewt


In [3]:
#positions of all long s
conditions = [
    (df_slong["pre_text"] == ""),#s has no previous text, has to stand at word beginning
    (df_slong["after_text"] == ""),#s has no following text, has to stand at word end
    (df_slong["pre_text"] != "")&(df_slong["after_text"] != "")#s has text before and after, has to stand in the middle
]
pos_vals = ["initial", "final", "medial"]#corresponding values to conditions
df_slong["position"] = np.select(conditions, pos_vals)#dataframe is extended with column 'position', its values are the position values
df_slong.head()

Unnamed: 0,folio,line,type,word,pre_text,after_text,position
0,143r,N001,#slong,|s|amen,,amen,initial
1,143r,N002,#slong,|s|ein,,ein,initial
2,143r,N003,#slong,vi|s|ch,vi,ch,medial
3,143r,N004,#slong,|s|chueppen,,chueppen,initial
4,143r,N007,#slong,|s|ewt,,ewt,initial


In [4]:
df_slong.to_csv('M10_slong_pos.csv', encoding = "utf-8")#dataframe is written to csv

In [5]:
df_slong["position"].value_counts()#each position is counted

initial    345
medial     228
Name: position, dtype: int64

In [6]:
#read csv and create dataframe from it - NA are replaced by empty strings
df_snorm = pd.read_csv('M10_snorm.csv')
df_slong.set_index('folio')
df_snorm = df_snorm.fillna('')
df_snorm.head()

Unnamed: 0,folio,line,type,word,pre_text,after_text
0,143r,N002,#snorm,al-s-,al,
1,143r,N007,#snorm,da-s-,da,
2,143r,N008,#snorm,e-s-,e,
3,143r,N008,#snorm,e-s-,e,
4,143r,N022,#snorm,wein-s-,wein,


In [7]:
#positions of all long s
conditions = [
    (df_snorm["pre_text"] == ""),#s has no previous text, has to stand at word beginning
    (df_snorm["after_text"] == ""),#s has no following text, has to stand at word end
    (df_snorm["pre_text"] != "")&(df_snorm["after_text"] != "")#s has text before and after, has to stand in the middle
]
pos_vals = ["initial", "final", "medial"]#corresponding values to conditions
df_snorm["position"] = np.select(conditions, pos_vals)#dataframe is extended with column 'position', its values are the position
df_snorm.head()

Unnamed: 0,folio,line,type,word,pre_text,after_text,position
0,143r,N002,#snorm,al-s-,al,,final
1,143r,N007,#snorm,da-s-,da,,final
2,143r,N008,#snorm,e-s-,e,,final
3,143r,N008,#snorm,e-s-,e,,final
4,143r,N022,#snorm,wein-s-,wein,,final


In [8]:
df_snorm.to_csv('M10_snorm_pos.csv', encoding = "utf-8")#dataframe is written to csv

In [9]:
df_snorm["position"].value_counts()#each position is counted

final      128
initial      8
medial       1
Name: position, dtype: int64

In [10]:
#visualisation to check for obvious errors
#positions=['initial', 'medial', 'final']

#fig = go.Figure(data=[
    #go.Bar(name='s-long', x=positions, y=[2537, 2535, 15]),
    #go.Bar(name='s-round', x=positions, y=[325, 95, 2166])
#])

# Change the bar mode
#fig.update_layout(barmode='group')
#fig.show()

In [11]:
#find double long s
conditions = [
    (df_slong["after_text"].str[0] == "|"),#find all long s followed by a long s - gives first character in after_text
    (df_slong["after_text"].str[0] != "|")#find all long s not followed by a long s (to control if all long s are found)
]
double_vals = ["yes", "no"]
df_slong["doubles"] = np.select(conditions, double_vals)
df_slong["doubles"].value_counts()

no     519
yes     54
Name: doubles, dtype: int64

In [12]:
#find graph combinations with long s: sch, st, round s
conditions = [
    (df_slong["after_text"].str[0:2] == "ch"),#find all long s followed by ch - gives first two characters in after_text
    (df_slong["after_text"].str[0] == "t"),#find all long s followed by t - gives first character in after_text
    (df_slong["pre_text"].str[-1] == "-"),#find all long s preceded by round s - gives last character in pre_text
    (df_slong["after_text"].str[0] == "-")#find all long s preceded by round s - gives first character in pre_text
]
char_vals = ["ch", "t", "rounds-pre", "rounds-after"]
df_slong["chars"] = np.select(conditions, char_vals)
df_slong["chars"].value_counts()

0               385
ch              114
t                72
rounds-after      2
Name: chars, dtype: int64

In [13]:
#double s - round s
conditions = [
    (df_snorm["pre_text"].str[0] == "-"),#find all round s followed by a round s
    (df_snorm["pre_text"].str[0] != "-")#find all round s not followed by a round s (to control if all long s are found)
]
double_vals = ["yes", "no"]
df_snorm["doubles"] = np.select(conditions, double_vals)
df_snorm["doubles"].value_counts()

no    137
Name: doubles, dtype: int64

In [14]:
#round s: sch, st, rounds-longs
conditions = [
    (df_snorm["after_text"].str[0:2] == "ch"),#find all round s followed by ch - gives first two characters in after_text
    (df_snorm["after_text"].str[0] == "t"),#find all round s followed by t - gives first character in after_text
    (df_snorm["pre_text"].str[-1] == "|"),#find all round s preceded by long s - gives last character in pre_text
    (df_snorm["after_text"].str[0] == "|")#find all round s preceded by long s - gives first character in pre_text
]
char_vals = ["ch", "t", "longs-pre", "longs-after"]
df_snorm["chars"] = np.select(conditions, char_vals)
df_snorm["chars"].value_counts()

0            135
longs-pre      2
Name: chars, dtype: int64

In [15]:
#create new dataframe with three new, empty columns
df_slong_pos_counts = df_slong.reindex(columns = df_slong.columns.tolist() + ['initial','medial', 'final'])
df_slong_pos_counts.head()

Unnamed: 0,folio,line,type,word,pre_text,after_text,position,doubles,chars,initial,medial,final
0,143r,N001,#slong,|s|amen,,amen,initial,no,0,,,
1,143r,N002,#slong,|s|ein,,ein,initial,no,0,,,
2,143r,N003,#slong,vi|s|ch,vi,ch,medial,no,ch,,,
3,143r,N004,#slong,|s|chueppen,,chueppen,initial,no,ch,,,
4,143r,N007,#slong,|s|ewt,,ewt,initial,no,0,,,


In [16]:
#count long s positions across the manuscript
#add 1 per row, which has 'initial' in the position-column
df_slong_pos_counts.loc[df_slong_pos_counts['position'] == 'initial', 'initial'] = 1  
df_slong_pos_counts.loc[df_slong_pos_counts['position'] != 'initial', 'initial'] = 0

#add 1 per row, which has 'medial' in the position-column
df_slong_pos_counts.loc[df_slong_pos_counts['position'] == 'medial', 'medial'] = 1  
df_slong_pos_counts.loc[df_slong_pos_counts['position'] != 'medial', 'medial'] = 0

#add 1 per row, which has 'final' in the position-column
df_slong_pos_counts.loc[df_slong_pos_counts['position'] == 'final', 'final'] = 1  
df_slong_pos_counts.loc[df_slong_pos_counts['position'] != 'final', 'final'] = 0

#create position sum per folio
df_slong_ini = df_slong_pos_counts.groupby('folio')['initial'].sum()
df_slong_ini = pd.DataFrame(df_slong_ini)

df_slong_med = df_slong_pos_counts.groupby('folio')['medial'].sum()
df_slong_med = pd.DataFrame(df_slong_med)

df_slong_fin = df_slong_pos_counts.groupby('folio')['final'].sum()
df_slong_fin = pd.DataFrame(df_slong_fin)

#create new dataframe from these three dataframes
df_slong_pos = pd.concat([df_slong_ini, df_slong_med, df_slong_fin], axis=1)
#write it to csv
df_slong_pos.to_csv('M10_slong_pos_counts.csv', encoding = "utf-8")

In [17]:
#count round s positions across the manuscript
#add 1 per row, which has 'initial' in the position-column
df_snorm_pos_counts = df_snorm.reindex(columns = df_snorm.columns.tolist() + ['initial','medial', 'final'])

df_snorm_pos_counts.loc[df_snorm_pos_counts['position'] == 'initial', 'initial'] = 1  
df_snorm_pos_counts.loc[df_snorm_pos_counts['position'] != 'initial', 'initial'] = 0

df_snorm_pos_counts.loc[df_snorm_pos_counts['position'] == 'medial', 'medial'] = 1  
df_snorm_pos_counts.loc[df_snorm_pos_counts['position'] != 'medial', 'medial'] = 0

df_snorm_pos_counts.loc[df_snorm_pos_counts['position'] == 'final', 'final'] = 1  
df_snorm_pos_counts.loc[df_snorm_pos_counts['position'] != 'final', 'final'] = 0

df_snorm_ini = df_snorm_pos_counts.groupby('folio')['initial'].sum()
df_snorm_ini = pd.DataFrame(df_snorm_ini)

df_snorm_med = df_snorm_pos_counts.groupby('folio')['medial'].sum()
df_snorm_med = pd.DataFrame(df_snorm_med)

df_snorm_fin = df_snorm_pos_counts.groupby('folio')['final'].sum()
df_snorm_fin = pd.DataFrame(df_snorm_fin)

df_snorm_pos = pd.concat([df_snorm_ini, df_snorm_med, df_snorm_fin], axis=1)
df_snorm_pos.to_csv('M10_snorm_pos_counts.csv', encoding = "utf-8")