# All inserts

Generate descriptions of variable regions for all inserts.

In [1]:
import pandas as pd
from itertools import combinations

In [2]:
GC_skew = [0, 0.1, 0.2, 0.4, 0.6]
AT_skew = [0, 0.2, 0.4]
GC_content = [0.3, 0.4, 0.5, 0.6]
G_clustering = [2, 3, 4]

In [3]:
def write_dataframe_to_latex_table(dataframe, output, caption):
    with open(output, 'w') as handle:
        handle.write(dataframe.to_latex(index=False, caption=caption))

Make all combinations of GC skew and content.

In [4]:
GC_skew_content_combos = []

for each_skew in GC_skew:
    for each_content in GC_content:
        GC_skew_content_combos.append((each_skew, 0, each_content))

df = pd.DataFrame(set(GC_skew_content_combos))
df.columns = ['GC Skew', 'AT Skew', 'GC Content']
df

Unnamed: 0,GC Skew,AT Skew,GC Content
0,0.2,0,0.4
1,0.1,0,0.3
2,0.6,0,0.6
3,0.4,0,0.3
4,0.2,0,0.3
5,0.0,0,0.3
6,0.0,0,0.6
7,0.6,0,0.5
8,0.1,0,0.6
9,0.4,0,0.6


Add G clustering inserts.

In [5]:
# all clustered regions have 60% GC content with GC skew of 0.4
df['G Clustering'] = [0 for _ in range(len(df))]
g_clustering_combos = []
for cluster_len in G_clustering:
    for at_skew in AT_skew:
        g_clustering_combos.append(
            [0.4, at_skew, 0.60, cluster_len]
        )
cluster_df = pd.DataFrame(g_clustering_combos)
cluster_df.columns = ['GC Skew', 'AT Skew', 'GC Content', 'G Clustering']
df = df.append(cluster_df)
df

Unnamed: 0,GC Skew,AT Skew,GC Content,G Clustering
0,0.2,0.0,0.4,0
1,0.1,0.0,0.3,0
2,0.6,0.0,0.6,0
3,0.4,0.0,0.3,0
4,0.2,0.0,0.3,0
5,0.0,0.0,0.3,0
6,0.0,0.0,0.6,0
7,0.6,0.0,0.5,0
8,0.1,0.0,0.6,0
9,0.4,0.0,0.6,0


Termination regions are reverse complements of initiation regions.

In [6]:
df['Reverse Complement'] = 0
df['Insert Number'] = range(len(df))
df.loc[(df['GC Skew'].isin([0, 0.2, 0.4])) & (df['GC Content'].isin([0.5, 0.4, 0.3])) | (df['G Clustering'] > 0), 'Reverse Complement'] = 1
df                 

Unnamed: 0,GC Skew,AT Skew,GC Content,G Clustering,Reverse Complement,Insert Number
0,0.2,0.0,0.4,0,1,0
1,0.1,0.0,0.3,0,0,1
2,0.6,0.0,0.6,0,0,2
3,0.4,0.0,0.3,0,1,3
4,0.2,0.0,0.3,0,1,4
5,0.0,0.0,0.3,0,1,5
6,0.0,0.0,0.6,0,0,6
7,0.6,0.0,0.5,0,0,7
8,0.1,0.0,0.6,0,0,8
9,0.4,0.0,0.6,0,0,9


In [7]:
df_caption = ''
write_dataframe_to_latex_table(df, '../tables/syn_VRs.tex', df_caption)

In [8]:
rc_regions = df.loc[(df['Reverse Complement'] == 1)]
rc_regions['GC Skew'] = rc_regions['GC Skew'] * -1
rc_regions['AT Skew'] = rc_regions['AT Skew'] * -1
rc_regions = rc_regions.drop(labels=('Reverse Complement'), axis=1)
rc_regions = rc_regions.rename(
    columns={'G Clustering': 'C Clustering',
            'Insert Number': 'Reverse Complement of Insert'}
    )
rc_regions

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rc_regions['GC Skew'] = rc_regions['GC Skew'] * -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rc_regions['AT Skew'] = rc_regions['AT Skew'] * -1


Unnamed: 0,GC Skew,AT Skew,GC Content,C Clustering,Reverse Complement of Insert
0,-0.2,-0.0,0.4,0,0
3,-0.4,-0.0,0.3,0,3
4,-0.2,-0.0,0.3,0,4
5,-0.0,-0.0,0.3,0,5
11,-0.0,-0.0,0.5,0,11
13,-0.0,-0.0,0.4,0,13
16,-0.4,-0.0,0.5,0,16
17,-0.2,-0.0,0.5,0,17
19,-0.4,-0.0,0.4,0,19
0,-0.4,-0.0,0.6,2,20


In [9]:
rc_caption = ''
write_dataframe_to_latex_table(rc_regions, '../tables/RC_VRs.tex', rc_caption)

In [10]:
total_regions = pd.DataFrame(
    {
        'Total synthesized inserts': [len(df)],
        'Total contructs': [len(rc_regions) + len(df)]
    }
)
total_regions

Unnamed: 0,Total synthesized inserts,Total contructs
0,29,47


In [11]:
tr_caption = ''
write_dataframe_to_latex_table(total_regions, '../tables/total_VR_counts.tex', tr_caption)