In [1]:
import numpy as np
import pandas as pd

# load data from file
table = pd.read_csv('iris_data.csv', names=['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'iris_class'])  

# create DataFrame for each class
setosa_data = table[table.iris_class=='Iris-setosa']
versicolor_data = table[table.iris_class=='Iris-versicolor']
virginica_data = table[table.iris_class=='Iris-virginica']

# create summary DataFrame
iris_flowers = pd.DataFrame({'Parameters':['SEPAL LENGTH [CM]','N','MEAN', 'MIN', 'MEDIAN', 'MAX','STANDARD DEVIATION', 
                                        'SEPAL LENGTH (%)', '< 5', '>=5 AND <6', '>=6 AND <7', '>= 7',
                                        'SEPAL WIDTH [CM]','N','MEAN', 'MIN', 'MEDIAN', 'MAX','STANDARD DEVIATION', 
                                        'SEPAL WIDTH (%)', '< 3', '>=3 AND <3.5', '>=3.5 AND <4']})

table

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,iris_class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Iris-virginica
146,6.3,2.5,5.0,1.9,Iris-virginica
147,6.5,3.0,5.2,2.0,Iris-virginica
148,6.2,3.4,5.4,2.3,Iris-virginica


In [2]:
def summary_cm (df):
    numb = df.shape[0]
    mean = "{:.2f}".format(np.mean(df))
    min_val = min(df)
    median = "{:.2f}".format(df.median())
    max_val = max(df)
    st_dev = "{:.2f}".format(np.std(df))   
    return(['', numb, mean, min_val, median, max_val, st_dev])

# % for length
def summary_lehgth (df,a,b,c):
    numb = df.shape[0]
    x1 = "{}({:.1f})".format(df[(df < a)].shape[0], df[(df < a)].shape[0]/numb*100)    
    x2 = "{}({:.1f})".format(df[(df >= a) & (df < b)].shape[0], df[(df >= a) & (df < b)].shape[0]/numb*100)   
    x3 = "{}({:.1f})".format(df[(df >= b) & (df < c)].shape[0], df[(df >= b) & (df < c)].shape[0]/numb*100)    
    x4 = "{}({:.1f})".format(df[(df >= c)].shape[0], df[(df >= c)].shape[0]/numb*100)   
    return(['', x1, x2, x3, x4])

# % for width
def summary_width (df,a,b,c):
    numb = df.shape[0]
    x1 = "{}({:.1f})".format(df[(df < a)].shape[0], df[(df < a)].shape[0]/numb*100)    
    x2 = "{}({:.1f})".format(df[(df >= a) & (df < b)].shape[0], df[(df >= a) & (df < b)].shape[0]/numb*100)   
    x3 = "{}({:.1f})".format(df[(df >= b) & (df < c)].shape[0], df[(df >= b) & (df < c)].shape[0]/numb*100)   
    return(['', x1, x2, x3])

In [3]:
# add columns to the summary DataFrame
iris_flowers["Iris-setosa"] = summary_cm(setosa_data.sepal_length) + summary_lehgth(setosa_data.sepal_length,5,6,7) + summary_cm(setosa_data.sepal_width) + summary_width(setosa_data.sepal_width,3,3.5,4)
iris_flowers["Iris-versicolor"] = summary_cm(versicolor_data.sepal_length) + summary_lehgth(versicolor_data.sepal_length,5,6,7) + summary_cm(versicolor_data.sepal_width) + summary_width(versicolor_data.sepal_width,3,3.5,4)
iris_flowers["Iris-virginica"] = summary_cm(virginica_data.sepal_length) + summary_lehgth(virginica_data.sepal_length,5,6,7) + summary_cm(virginica_data.sepal_width) + summary_width(virginica_data.sepal_width,3,3.5,4)
iris_flowers["Total"] = summary_cm(table.sepal_length) + summary_lehgth(table.sepal_length,5,6,7) + summary_cm(table.sepal_width) + summary_width(table.sepal_width,3,3.5,4)

iris_flowers

Unnamed: 0,Parameters,Iris-setosa,Iris-versicolor,Iris-virginica,Total
0,SEPAL LENGTH [CM],,,,
1,N,50,50,50,150
2,MEAN,5.01,5.94,6.59,5.84
3,MIN,4.3,4.9,4.9,4.3
4,MEDIAN,5.00,5.90,6.50,5.80
5,MAX,5.8,7,7.9,7.9
6,STANDARD DEVIATION,0.35,0.51,0.63,0.83
7,SEPAL LENGTH (%),,,,
8,< 5,20(40.0),1(2.0),1(2.0),22(14.7)
9,>=5 AND <6,30(60.0),25(50.0),6(12.0),61(40.7)


In [4]:
# write DataFrame to the file
f = open('iris-flowers.txt', 'w')

f.write("{0:^120}\n".format('Iris Flower Summary'))
f.write("{0:^120}\n".format('All Flowers' + '\n'))
f.write("{0:<110}\n".format('-'*110))
f.write("{0:<40}{1:<20}{2:<20}{3:<20}{4:<20}\n".format('','Iris Setosa', 'Iris Versicolor','Iris Virginica','Total'))
f.write("{0:<40}{1:<20}{2:<20}{3:<20}{4:<20}\n".format('',
                                                       'N = '+str(iris_flowers.loc[1]['Iris-setosa']),
                                                       'N = '+str(iris_flowers.loc[1]['Iris-versicolor']),
                                                       'N = '+str(iris_flowers.loc[1]['Iris-virginica']),
                                                       'N = '+str(iris_flowers.loc[1]['Total'])))
f.write("{0:<110}\n".format('-'*110))
for index, col in iris_flowers.iterrows(): 
    if (index == 0 or index == 7 or index == 12 or index == 19):
        f.write('\n')
        f.write("{0:<20}\n".format(col['Parameters']))
    else:
        f.write("{0:<3}{1:<37}{2:<20}{3:<20}{4:<20}{5:<20}\n".format('', col['Parameters'], col['Iris-setosa'], col['Iris-versicolor'], col['Iris-virginica'], col['Total']))
f.write("{0:<110}\n".format('-'*110))
f.close()