-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
146 lines (117 loc) · 4.2 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import seaborn as sns
import os
import streamlit as st
# EDA Pkgs
import pandas as pd
# Viz Pkgs
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('Agg')
def main():
""" Common ML Dataset Explorer """
st.title("Dataset Explorer")
st.subheader("A Visualizer for your Datasets")
def file_selector(folder_path='./datasets'):
filenames = os.listdir(folder_path)
selected_filename = st.selectbox("Select A file", filenames)
return os.path.join(folder_path, selected_filename)
filename = file_selector()
st.info("You Selected {}".format(filename))
# Read Data
df = pd.read_csv(filename)
# Show Dataset
# Show Rows
if st.button("Row Names"):
st.write(df.head())
# Show Columns
if st.button("Column Names"):
st.write(df.columns)
# Show Shape
if st.checkbox("Shape of Dataset"):
data_dim = st.radio("Show Dimension By ", ("Rows", "Columns"))
if data_dim == 'Rows':
st.text("Number of Rows")
st.write(df.shape[0])
elif data_dim == 'Columns':
st.text("Number of Columns")
st.write(df.shape[1])
else:
st.write(df.shape)
# Select Columns
if st.checkbox("Select Columns To Show"):
all_columns = df.columns.tolist()
selected_columns = st.multiselect("Select", all_columns)
new_df = df[selected_columns]
st.dataframe(new_df)
# Show Values
if st.button("Value Counts"):
st.text("Value Counts By Target/Class")
st.write(df.iloc[:, -1].value_counts())
# Show Datatypes
if st.button("Data Types"):
st.write(df.dtypes)
# Show Satisitics
if st.button("Know the Statistical elements of your Data"):
st.write(df.describe())
# Show Summary
if st.checkbox("Summary"):
st.write(df.describe().T)
## Plot and Visualization
st.subheader("Data Visualization")
# Correlation
# Seaborn Plot
if st.checkbox("Correlation Plot[Seaborn]"):
st.write(sns.heatmap(df.corr(), annot=True))
st.pyplot()
# Pie Chart
if st.checkbox("Pie Plot"):
all_columns_names = df.columns.tolist()
if st.button("Generate Pie Plot"):
st.success("Generating A Pie Plot")
st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
st.pyplot()
# Count Plot
if st.checkbox("Plot of Value Counts"):
st.text("Value Counts By Target")
all_columns_names = df.columns.tolist()
primary_col = st.selectbox(
"Primary Columm to GroupBy", all_columns_names)
selected_columns_names = st.multiselect(
"Select Columns", all_columns_names)
if st.button("Plot"):
st.text("Generate Plot")
if selected_columns_names:
vc_plot = df.groupby(primary_col)[
selected_columns_names].count()
else:
vc_plot = df.iloc[:, -1].value_counts()
st.write(vc_plot.plot(kind="bar"))
st.pyplot()
# Customizable Plot
st.subheader("Customizable Plot")
all_columns_names = df.columns.tolist()
type_of_plot = st.selectbox("Select Type of Plot", [
"area", "bar", "line", "hist", "box", "kde"])
selected_columns_names = st.multiselect(
"Select Columns To Plot", all_columns_names)
if st.button("Generate Plot"):
st.success("Generating Customizable Plot of {} for {}".format(
type_of_plot, selected_columns_names))
# Plot By Streamlit
if type_of_plot == 'area':
cust_data = df[selected_columns_names]
st.area_chart(cust_data)
elif type_of_plot == 'bar':
cust_data = df[selected_columns_names]
st.bar_chart(cust_data)
elif type_of_plot == 'line':
cust_data = df[selected_columns_names]
st.line_chart(cust_data)
# Custom Plot
elif type_of_plot:
cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
st.write(cust_plot)
st.pyplot()
st.sidebar.text("Built by Jay Vinay Namgiri")
if __name__ == '__main__':
main()