Skip to content

Commit 91e698d

Browse files
committed
update with dev files
1 parent 03b8de3 commit 91e698d

File tree

11 files changed

+177
-137
lines changed

11 files changed

+177
-137
lines changed

dist/ds11mltoolkit-1.4.tar.gz

-19.4 KB
Binary file not shown.

ds11mltoolkit.egg-info/PKG-INFO

Lines changed: 0 additions & 18 deletions
This file was deleted.

ds11mltoolkit.egg-info/SOURCES.txt

Lines changed: 0 additions & 14 deletions
This file was deleted.

ds11mltoolkit.egg-info/dependency_links.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

ds11mltoolkit.egg-info/requires.txt

Lines changed: 0 additions & 16 deletions
This file was deleted.

ds11mltoolkit.egg-info/top_level.txt

Lines changed: 0 additions & 1 deletion
This file was deleted.

ds11mltoolkit/data_analysis.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import pandas as pd
2-
import pickle
32
import zipfile
43
import scipy.stats as stats
54
import pandas as pd
5+
import webbrowser
66

77
def read_url(url):
88

@@ -78,3 +78,20 @@ def chi_squared_test(df, feature, target):
7878

7979
# Return the chi-squared value and p-value obtained in the test.
8080
return chi2, p
81+
82+
def solver(valor=None):
83+
"""
84+
Function to solve all your problems.
85+
Parameters
86+
----------
87+
None if you want to see the light.
88+
Any value if you want to see the other side.
89+
Returns
90+
---------
91+
Opens a browser with guidance.
92+
"""
93+
94+
if valor is None:
95+
webbrowser.open("https://chat.openai.com/chat")
96+
else:
97+
webbrowser.open("https://www.reddit.com/r/rant/comments/oo0uck/fuck_fuck_fuck/")

ds11mltoolkit/data_processing.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,31 +15,6 @@
1515
DirectoryIterator
1616
)
1717

18-
def extract_date(df, date_column_name):
19-
'''
20-
Function to extract the data of a date, from a column that does not have date format.
21-
The function also creates a column for date and year.
22-
23-
Parameters
24-
----------
25-
df : dataframe
26-
It is the dataset where the column that we have to extract the data is located.
27-
date_column_name : str
28-
is the column that has data in string. From here the data is extracted and converted to date format.
29-
30-
31-
Return
32-
----------
33-
df : Dataframe with the changes made
34-
35-
'''
36-
match = re.search(r'\d{4}.\d{2}.\d{2}', date_column_name)
37-
date = datetime.strptime(match.group(), '%Y-%m-%d').date()
38-
39-
df['Year'] = df[date_column_name].dt.year
40-
df['Month'] = df[date_column_name].dt.month
41-
42-
return df
4318

4419
def list_categorical_columns(df):
4520
'''
@@ -844,4 +819,40 @@ class subdirectories (default: False).
844819
next(generator)
845820
images_generated += batch_size
846821

847-
return generator
822+
return generator
823+
824+
825+
def Nantreatment(data, replace=True, replace_value='None', replace_numeric_with_mean=False):
826+
'''
827+
Function:
828+
-----------
829+
This function works with the Nan's inside of a DataFrame, wich give you diferents option when you try to work with them
830+
Parameters:
831+
-----------
832+
data: Pandas DataFrame
833+
Data that the function is going to analyze
834+
replace: bool
835+
Depends if its True or False, True gives you the Nan replace by a zero or the mean if the column is a number
836+
and None if the column is an object,in case that replace is False, drops all the Nan's in the DataFrame
837+
replace_numeric_with_mean: bool
838+
choose if you want to Nan with 0 or with the mean
839+
840+
Returns:
841+
-----------
842+
Pandas DataFrame
843+
The function returns a copy of the input DataFrame with NaN values replaced or dropped.
844+
'''
845+
846+
847+
if replace:
848+
if replace_numeric_with_mean:
849+
data = data.fillna(value=data.mean())
850+
else:
851+
for name in data.select_dtypes(include=[np.number]):
852+
data[name] = data[name].fillna(value=0)
853+
for name in data.select_dtypes(include=[object]):
854+
data[name] = data[name].fillna(replace_value)
855+
else:
856+
data = data.dropna()
857+
858+
return data.reset_index(drop=True)

0 commit comments

Comments
 (0)