In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
import csv
import numpy as np
import matplotlib.pyplot as plt

import os
import glob
import re

In [2]:
# read in original data

path = '/Users/danielle/Documents/thesis/subsampled_analysis/original_resampling/*'                     
all_files = [name for name in glob.glob(path)]   

In [3]:
df_from_each_file = []
unclassified_list = []

for f in all_files:
  
    df = pd.read_csv(f, sep = '\t') # read in dataframe 
    
    id_1 = f.split('original_resampling/')[1] # add sample id from filename 
    id_2 = re.split('(_S)', id_1)[0]
    df["sampleid"] = id_2
    df["sampleid"]= df["sampleid"].str.replace("_", "-") 

    df.rename(columns = {'#SampleID':'taxa', 'Metaphlan2_Analysis':'abundance'}, inplace = True) 

    df = df[df['taxa'].str.contains("\|g__")] # keep genera
    df = df[~df['taxa'].str.contains("\|s__")] # keep species
    df["taxa"] = df['taxa'].str.split("\|g__").str[-1]
    df["taxa"] = df['taxa'].str.split("\|s__").str[0]# percent unclassified organisms profiled with mgx
    unclassified = sum(df.taxa.str.contains("_unclassified| noname | Candidatus"))/df.shape[0]
    unclassified_list.append(unclassified)

    # remove unclassified
    df = df[~df.taxa.str.contains("_unclassified")]
    df = df[~df.taxa.str.contains("_noname")]
    df = df[~df.taxa.str.contains("Candidatus")]

    # combine together taxa of the same genera
    df = df.groupby(['taxa', 'sampleid'])['abundance'].sum().reset_index()    

    # convert to relative abundance
    df["abundance"] = df["abundance"]/100.0

    df_from_each_file.append(df)

In [4]:
# averaging mean unclassified across all dataframes
np.mean(unclassified_list)*100

0.6376387867260078

In [5]:
original = pd.concat(df_from_each_file, ignore_index=True) # concat all dataframes together

In [6]:
original_reshaped = original.pivot_table(index = "sampleid", values="abundance", columns = "taxa") # pivot

In [7]:
original_reshaped.head()

taxa,Acidaminococcus,Acinetobacter,Actinobacillus,Actinomyces,Adlercreutzia,Akkermansia,Alistipes,Alloprevotella,Anaerococcus,Anaerofustis,...,Ruminococcus,Shigella,Slackia,Staphylococcus,Streptococcus,Subdoligranulum,Sutterella,Turicibacter,Varibaculum,Veillonella
sampleid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C0005-3F-1A,,,,,0.000395,,0.059734,,0.000243,,...,0.063788,,,,0.001462,0.038609,0.007346,0.000521,,
C0016-3F-1A,,,,,0.000187,0.0025,0.049752,,,,...,0.048786,,,,0.009097,0.06576,0.008949,,,0.00043
C0017-2F-1A,,,,,,0.024144,0.079612,,,,...,0.011487,,,,0.001998,0.022731,0.019241,,,0.000475
C0029-6F-1A,,,,0.000123,0.002727,0.023825,0.117251,,,,...,0.055014,,,,,0.049503,,,,
C0032-9F-1A,,,,,0.002953,,0.10397,,,,...,0.255394,,,,0.043516,0.080632,,,,


In [8]:
original_reshaped.columns.name = None

In [9]:
original_reshaped = original_reshaped.rename_axis(None, axis=1).reset_index()

In [10]:
original_reshaped = original_reshaped.fillna(0)  # fill in missing values with 0

In [11]:
original_reshaped["uid"] = original_reshaped["sampleid"].astype(str)+'-original'# add unique identifier

In [12]:
original_reshaped.head()

Unnamed: 0,sampleid,Acidaminococcus,Acinetobacter,Actinobacillus,Actinomyces,Adlercreutzia,Akkermansia,Alistipes,Alloprevotella,Anaerococcus,...,Shigella,Slackia,Staphylococcus,Streptococcus,Subdoligranulum,Sutterella,Turicibacter,Varibaculum,Veillonella,uid
0,C0005-3F-1A,0.0,0.0,0.0,0.0,0.000395,0.0,0.059734,0.0,0.000243,...,0.0,0.0,0.0,0.001462,0.038609,0.007346,0.000521,0.0,0.0,C0005-3F-1A-original
1,C0016-3F-1A,0.0,0.0,0.0,0.0,0.000187,0.0025,0.049752,0.0,0.0,...,0.0,0.0,0.0,0.009097,0.06576,0.008949,0.0,0.0,0.00043,C0016-3F-1A-original
2,C0017-2F-1A,0.0,0.0,0.0,0.0,0.0,0.024144,0.079612,0.0,0.0,...,0.0,0.0,0.0,0.001998,0.022731,0.019241,0.0,0.0,0.000475,C0017-2F-1A-original
3,C0029-6F-1A,0.0,0.0,0.0,0.000123,0.002727,0.023825,0.117251,0.0,0.0,...,0.0,0.0,0.0,0.0,0.049503,0.0,0.0,0.0,0.0,C0029-6F-1A-original
4,C0032-9F-1A,0.0,0.0,0.0,0.0,0.002953,0.0,0.10397,0.0,0.0,...,0.0,0.0,0.0,0.043516,0.080632,0.0,0.0,0.0,0.0,C0032-9F-1A-original


In [13]:
# read in subsampled data

subsampled_path = '/Users/danielle/Documents/thesis/subsampled_analysis/subsample_2_profiles/*'                     
subsampled_files = [name for name in glob.glob(subsampled_path)]   

In [14]:
df_from_subsampled_file = []
unclassified_list = []

for f in subsampled_files:
    df = pd.read_csv(f, sep = '\t', skiprows=[0,1,2], usecols = ['#clade_name','relative_abundance']) # read in dataframe 
    
    # if row names == 5, end loop
    if df.shape[0] < 5:
        pass
    
    else:
        id_1 = f.split('profiles/')[1] # add sample id from filename 
        df["sampleid"] = re.split('(_\d+k)', id_1)[0]
        df["sampleid"]= df["sampleid"].str.replace("_", "-") 
        df["uid"] = id_1.split('_profile.tsv')[0]

        df.rename(columns = {'#clade_name':'taxa', 'relative_abundance':'abundance'}, inplace = True) 

        df = df[df['taxa'].str.contains("\|g__")] # keep genera
        df = df[~df['taxa'].str.contains("\|s__")] # keep species
        df["taxa"] = df['taxa'].str.split("\|g__").str[-1]
        df["taxa"] = df['taxa'].str.split("\|s__").str[0]

        # percent unclassified organisms profiled with mgx
        unclassified = sum(df.taxa.str.contains("_unclassified| noname | Candidatus"))/df.shape[0]
        unclassified_list.append(unclassified)

        # remove unclassified
        df = df[~df.taxa.str.contains("_unclassified")]
        df = df[~df.taxa.str.contains("_noname")]
        df = df[~df.taxa.str.contains("Candidatus")]

        # combine together taxa of the same genera
        df = df.groupby(['taxa', 'sampleid', 'uid'])['abundance'].sum().reset_index()    

        # convert to relative abundance
        df["abundance"] = df["abundance"]/100.0

        df_from_subsampled_file.append(df)

In [15]:
subsample = pd.concat(df_from_subsampled_file, ignore_index=True) # concat all subsampled dataframes together

In [16]:
subsample_reshaped = subsample.pivot_table(index = ["sampleid", "uid"], values="abundance", columns = "taxa") # pivot

In [17]:
subsample_reshaped.columns.name = None

In [18]:
subsample_reshaped = subsample_reshaped.rename_axis(None, axis=1).reset_index()

In [19]:
subsample_reshaped = subsample_reshaped.fillna(0)  # fill in missing values with 0

### Melt dataframes

In [20]:
subsample_reshaped["replicate"] = subsample_reshaped['uid'].str.split("k_").str[-1]
subsample_reshaped["replicate"] = subsample_reshaped['replicate'].str.split("_profile").str[0]

In [21]:
subsample_reshaped["read_depth"] = subsample_reshaped['uid'].str.split("1A_").str[-1]
subsample_reshaped["read_depth"] = subsample_reshaped['read_depth'].str.split("k_").str[0]

In [22]:
subsample_reshaped.head(10)

Unnamed: 0,sampleid,uid,Actinomyces,Adlercreutzia,Agathobaculum,Akkermansia,Alistipes,Anaerostipes,Anaerotruncus,Asaccharobacter,...,Sellimonas,Slackia,Streptococcus,Turicibacter,Turicimonas,Tyzzerella,Veillonella,Victivallis,replicate,read_depth
0,C0005-3F-1A,C0005_3F_1A_1000k_1,0.0,0.0,0.0,0.0,0.073936,0.0,0.0,0.0,...,0.0,0.0,0.0,0.001235,0.0,0.0,0.0,0.0,1,1000
1,C0005-3F-1A,C0005_3F_1A_1000k_2,0.0,0.0,0.0,0.0,0.079473,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,1000
2,C0005-3F-1A,C0005_3F_1A_1000k_3,0.0,0.0,0.0,0.0,0.080825,0.0,0.0,0.0,...,0.0,0.0,0.0,0.001753,0.0,0.0,0.0,0.0,3,1000
3,C0005-3F-1A,C0005_3F_1A_1000k_4,0.0,0.0,0.0,0.0,0.084085,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000908,0.0,0.0,0.0,0.0,4,1000
4,C0005-3F-1A,C0005_3F_1A_100k_1,0.0,0.0,0.0,0.0,0.029034,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,100
5,C0005-3F-1A,C0005_3F_1A_100k_2,0.0,0.0,0.0,0.0,0.080409,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,100
6,C0005-3F-1A,C0005_3F_1A_100k_3,0.0,0.0,0.0,0.0,0.104999,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3,100
7,C0005-3F-1A,C0005_3F_1A_100k_4,0.0,0.0,0.0,0.0,0.070625,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,100
8,C0005-3F-1A,C0005_3F_1A_250k_1,0.0,0.0,0.0,0.0,0.080921,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,250
9,C0005-3F-1A,C0005_3F_1A_250k_2,0.0,0.0,0.0,0.0,0.087144,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,250


In [23]:
subsample_reshaped.head(10).to_csv('subsample_cleaned.csv', index=False)

In [24]:
# adding age metadata
age = pd.read_csv("~/Documents/thesis/theoretical/sorted_babies.csv", index_col = 0)

In [25]:
age.head()

Unnamed: 0,sample,subject,timepoint,correctedAgeDays,shannon,reads,AgeMonths,dev_stage,color
1,C0005_3F_1A,5,3,4505.0,3.379592,6193602.0,150.166667,older than 30 months,yellow
2,C0016_3F_1A,16,3,2987.0,3.162439,7626286.0,99.566667,older than 30 months,yellow
3,C0016_4F_1A,16,4,3398.0,3.415994,5497174.0,113.266667,older than 30 months,yellow
4,C0017_2F_1A,17,2,3836.0,3.102341,6240254.0,127.866667,older than 30 months,yellow
5,C0017_3F_1A,17,3,4247.0,3.460197,7764432.0,141.566667,older than 30 months,yellow


In [26]:
# remove shannon column, change characters in sample names 
age.drop(columns=['shannon'], inplace= True)
age["sample"] = age["sample"].str.replace("_",'-')
age.rename(columns = {'sample':'sampleid', "reads":"read_depth"}, inplace = True) 

In [27]:
age.head(15)

Unnamed: 0,sampleid,subject,timepoint,correctedAgeDays,read_depth,AgeMonths,dev_stage,color
1,C0005-3F-1A,5,3,4505.0,6193602.0,150.166667,older than 30 months,yellow
2,C0016-3F-1A,16,3,2987.0,7626286.0,99.566667,older than 30 months,yellow
3,C0016-4F-1A,16,4,3398.0,5497174.0,113.266667,older than 30 months,yellow
4,C0017-2F-1A,17,2,3836.0,6240254.0,127.866667,older than 30 months,yellow
5,C0017-3F-1A,17,3,4247.0,7764432.0,141.566667,older than 30 months,yellow
6,C0029-6F-1A,29,6,3478.0,5641016.0,115.933333,older than 30 months,yellow
7,C0032-9F-1A,32,9,3084.0,7575450.0,102.8,older than 30 months,yellow
8,C0043-7F-1A,43,7,2598.0,3498886.0,86.6,older than 30 months,yellow
9,C0043-8F-1A,43,8,3059.0,8846224.0,101.966667,older than 30 months,yellow
10,C0047-7F-1A,47,7,2917.0,6353788.0,97.233333,older than 30 months,yellow


In [28]:
# make age dictionary
agedict = {str(s): {} for s in age["sampleid"]}
for index, row in age.iterrows():
    age_months = row["AgeMonths"]
    agedict[row["sampleid"]]= age_months

In [29]:
# read_depth dictionary
readdict = {str(s): {} for s in age["sampleid"]}
for index, row in age.iterrows():
    reads = row["read_depth"]
    readdict[row["sampleid"]]= reads

In [30]:
# dev_stage dictionary
dev_stage_dict = {str(s): {} for s in age["dev_stage"]}
for index, row in age.iterrows():
    stage = row["dev_stage"]
    dev_stage_dict[row["sampleid"]] = stage

In [31]:
original_reshaped["AgeMonths"]= original_reshaped["sampleid"].map(agedict)
original_reshaped["read_depth"]= original_reshaped["sampleid"].map(readdict)
original_reshaped["dev_stage"]= original_reshaped["sampleid"].map(dev_stage_dict)
original_reshaped["sampling_cat"] = "original depth"
original_reshaped["replicate"] = 1

In [41]:
# finding mean/std sequencing depth for kids that we subsampled from
original_reshaped["read_depth"].values.mean()
original_reshaped["read_depth"].values.std()

7209871.4

2562647.2050451343

In [33]:
original_reshaped.head(15)

Unnamed: 0,sampleid,Acidaminococcus,Acinetobacter,Actinobacillus,Actinomyces,Adlercreutzia,Akkermansia,Alistipes,Alloprevotella,Anaerococcus,...,Sutterella,Turicibacter,Varibaculum,Veillonella,uid,AgeMonths,read_depth,dev_stage,sampling_cat,replicate
0,C0005-3F-1A,0.0,0.0,0.0,0.0,0.000395,0.0,0.059734,0.0,0.000243,...,0.007346,0.000521,0.0,0.0,C0005-3F-1A-original,150.166667,6193602.0,older than 30 months,original depth,1
1,C0016-3F-1A,0.0,0.0,0.0,0.0,0.000187,0.0025,0.049752,0.0,0.0,...,0.008949,0.0,0.0,0.00043,C0016-3F-1A-original,99.566667,7626286.0,older than 30 months,original depth,1
2,C0017-2F-1A,0.0,0.0,0.0,0.0,0.0,0.024144,0.079612,0.0,0.0,...,0.019241,0.0,0.0,0.000475,C0017-2F-1A-original,127.866667,6240254.0,older than 30 months,original depth,1
3,C0029-6F-1A,0.0,0.0,0.0,0.000123,0.002727,0.023825,0.117251,0.0,0.0,...,0.0,0.0,0.0,0.0,C0029-6F-1A-original,115.933333,5641016.0,older than 30 months,original depth,1
4,C0032-9F-1A,0.0,0.0,0.0,0.0,0.002953,0.0,0.10397,0.0,0.0,...,0.0,0.0,0.0,0.0,C0032-9F-1A-original,102.8,7575450.0,older than 30 months,original depth,1
5,C0043-7F-1A,0.0,0.0,0.0,0.0,0.000359,0.003434,0.019489,0.0,0.0,...,0.0,0.0,0.0,0.003807,C0043-7F-1A-original,86.6,3498886.0,older than 30 months,original depth,1
6,C0047-7F-1A,0.0,0.0,0.0,0.0,0.000337,0.001211,0.028321,0.0,0.0,...,0.010724,0.0,0.0,0.0,C0047-7F-1A-original,97.233333,6353788.0,older than 30 months,original depth,1
7,C0052-5F-1A,0.0,0.0,0.0,0.0,0.00045,7.6e-05,0.024938,0.0,0.0,...,0.0,0.0,0.0,0.000419,C0052-5F-1A-original,94.133333,7582006.0,older than 30 months,original depth,1
8,C0053-6F-1A,0.0,0.0,0.0,0.0,0.000239,0.0,0.000207,0.0,0.0,...,0.0,0.0,0.0,0.012215,C0053-6F-1A-original,72.5,7101552.0,older than 30 months,original depth,1
9,C0055-3F-1A,0.0,0.0,0.0,0.0,0.002635,0.0,0.133723,0.0,0.0,...,0.0,0.0,0.0,0.0,C0055-3F-1A-original,114.733333,4922724.0,older than 30 months,original depth,1


In [34]:
subsample_reshaped["AgeMonths"]= subsample_reshaped["sampleid"].map(agedict)
subsample_reshaped["dev_stage"]= subsample_reshaped["sampleid"].map(dev_stage_dict)
subsample_reshaped["sampling_cat"] = subsample_reshaped["read_depth"]

In [35]:
subsample_reshaped.head(15)

Unnamed: 0,sampleid,uid,Actinomyces,Adlercreutzia,Agathobaculum,Akkermansia,Alistipes,Anaerostipes,Anaerotruncus,Asaccharobacter,...,Turicibacter,Turicimonas,Tyzzerella,Veillonella,Victivallis,replicate,read_depth,AgeMonths,dev_stage,sampling_cat
0,C0005-3F-1A,C0005_3F_1A_1000k_1,0.0,0.0,0.0,0.0,0.073936,0.0,0.0,0.0,...,0.001235,0.0,0.0,0.0,0.0,1,1000,150.166667,older than 30 months,1000
1,C0005-3F-1A,C0005_3F_1A_1000k_2,0.0,0.0,0.0,0.0,0.079473,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,1000,150.166667,older than 30 months,1000
2,C0005-3F-1A,C0005_3F_1A_1000k_3,0.0,0.0,0.0,0.0,0.080825,0.0,0.0,0.0,...,0.001753,0.0,0.0,0.0,0.0,3,1000,150.166667,older than 30 months,1000
3,C0005-3F-1A,C0005_3F_1A_1000k_4,0.0,0.0,0.0,0.0,0.084085,0.0,0.0,0.0,...,0.000908,0.0,0.0,0.0,0.0,4,1000,150.166667,older than 30 months,1000
4,C0005-3F-1A,C0005_3F_1A_100k_1,0.0,0.0,0.0,0.0,0.029034,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1,100,150.166667,older than 30 months,100
5,C0005-3F-1A,C0005_3F_1A_100k_2,0.0,0.0,0.0,0.0,0.080409,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,100,150.166667,older than 30 months,100
6,C0005-3F-1A,C0005_3F_1A_100k_3,0.0,0.0,0.0,0.0,0.104999,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3,100,150.166667,older than 30 months,100
7,C0005-3F-1A,C0005_3F_1A_100k_4,0.0,0.0,0.0,0.0,0.070625,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4,100,150.166667,older than 30 months,100
8,C0005-3F-1A,C0005_3F_1A_250k_1,0.0,0.0,0.0,0.0,0.080921,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1,250,150.166667,older than 30 months,250
9,C0005-3F-1A,C0005_3F_1A_250k_2,0.0,0.0,0.0,0.0,0.087144,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,2,250,150.166667,older than 30 months,250


In [36]:
concat_df = pd.concat([original_reshaped,subsample_reshaped], sort=True).reset_index(drop = True)

In [37]:
concat_df = concat_df.fillna(0) 

In [38]:
cols_to_order = ['uid', 'sampleid', 'read_depth', 'AgeMonths', 'dev_stage', 'sampling_cat', 'replicate']
new_columns = cols_to_order + (concat_df.columns.drop(cols_to_order).tolist())
concat_df = concat_df[new_columns]

In [39]:
concat_df.head(15)

Unnamed: 0,uid,sampleid,read_depth,AgeMonths,dev_stage,sampling_cat,replicate,Acidaminococcus,Acinetobacter,Actinobacillus,...,Staphylococcus,Streptococcus,Subdoligranulum,Sutterella,Turicibacter,Turicimonas,Tyzzerella,Varibaculum,Veillonella,Victivallis
0,C0005-3F-1A-original,C0005-3F-1A,6193600.0,150.166667,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.001462,0.038609,0.007346,0.000521,0.0,0.0,0.0,0.0,0.0
1,C0016-3F-1A-original,C0016-3F-1A,7626290.0,99.566667,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.009097,0.06576,0.008949,0.0,0.0,0.0,0.0,0.00043,0.0
2,C0017-2F-1A-original,C0017-2F-1A,6240250.0,127.866667,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.001998,0.022731,0.019241,0.0,0.0,0.0,0.0,0.000475,0.0
3,C0029-6F-1A-original,C0029-6F-1A,5641020.0,115.933333,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.0,0.049503,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,C0032-9F-1A-original,C0032-9F-1A,7575450.0,102.8,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.043516,0.080632,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,C0043-7F-1A-original,C0043-7F-1A,3498890.0,86.6,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.041958,0.025354,0.0,0.0,0.0,0.0,0.0,0.003807,0.0
6,C0047-7F-1A-original,C0047-7F-1A,6353790.0,97.233333,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.0,0.01048,0.010724,0.0,0.0,0.0,0.0,0.0,0.0
7,C0052-5F-1A-original,C0052-5F-1A,7582010.0,94.133333,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.005149,0.05411,0.0,0.0,0.0,0.0,0.0,0.000419,0.0
8,C0053-6F-1A-original,C0053-6F-1A,7101550.0,72.5,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.038848,0.00502,0.0,0.0,0.0,0.0,0.0,0.012215,0.0
9,C0055-3F-1A-original,C0055-3F-1A,4922720.0,114.733333,older than 30 months,original depth,1,0.0,0.0,0.0,...,0.0,0.001065,0.028274,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [40]:
concat_df.to_csv('subsampled_df.csv', index = False)