TheBridgeMachineLearningPythonLibrary
diff --git a/‎dist/ds11mltoolkit-1.4.tar.gz‎
-19.4 KB b/‎dist/ds11mltoolkit-1.4.tar.gz‎
-19.4 KB
diff --git a/‎ds11mltoolkit.egg-info/PKG-INFO‎
Lines changed: 0 additions & 18 deletions b/‎ds11mltoolkit.egg-info/PKG-INFO‎
Lines changed: 0 additions & 18 deletions
diff --git a/‎ds11mltoolkit.egg-info/SOURCES.txt‎
Lines changed: 0 additions & 14 deletions b/‎ds11mltoolkit.egg-info/SOURCES.txt‎
Lines changed: 0 additions & 14 deletions
diff --git a/‎ds11mltoolkit.egg-info/dependency_links.txt‎
Lines changed: 0 additions & 1 deletion b/‎ds11mltoolkit.egg-info/dependency_links.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎ds11mltoolkit.egg-info/requires.txt‎
Lines changed: 0 additions & 16 deletions b/‎ds11mltoolkit.egg-info/requires.txt‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎ds11mltoolkit.egg-info/top_level.txt‎
Lines changed: 0 additions & 1 deletion b/‎ds11mltoolkit.egg-info/top_level.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎ds11mltoolkit/data_analysis.py‎
Lines changed: 18 additions & 1 deletion b/‎ds11mltoolkit/data_analysis.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎ds11mltoolkit/data_processing.py‎
Lines changed: 37 additions & 26 deletions b/‎ds11mltoolkit/data_processing.py‎
Lines changed: 37 additions & 26 deletions
@@ -1,8 +1,8 @@
 import pandas as pd
-import pickle
 import zipfile
 import scipy.stats as stats
 import pandas as pd
+import webbrowser
 
 def read_url(url):
 
@@ -78,3 +78,20 @@ def chi_squared_test(df, feature, target):
 
     # Return the chi-squared value and p-value obtained in the test.
     return chi2, p
+
+def solver(valor=None):
+    """
+    Function to solve all your problems. 
+    Parameters
+    ----------
+    None if you want to see the light.
+    Any value if you want to see the other side.
+    Returns
+    ---------
+    Opens a browser with guidance.
+    """
+
+    if valor is None:
+        webbrowser.open("https://chat.openai.com/chat")
+    else:
+        webbrowser.open("https://www.reddit.com/r/rant/comments/oo0uck/fuck_fuck_fuck/")
@@ -15,31 +15,6 @@
     DirectoryIterator
 )
 
-def extract_date(df, date_column_name):
-    '''
-    Function to extract the data of a date, from a column that does not have date format.
-    The function also creates a column for date and year.
-
-    Parameters
-    ----------
-    df : dataframe
-        It is the dataset where the column that we have to extract the data is located.
-    date_column_name : str
-        is the column that has data in string. From here the data is extracted and converted to date format.
-
-    
-    Return
-    ----------
-        df : Dataframe with the changes made
-
-    '''
-    match = re.search(r'\d{4}.\d{2}.\d{2}', date_column_name)
-    date = datetime.strptime(match.group(), '%Y-%m-%d').date()
-
-    df['Year'] = df[date_column_name].dt.year
-    df['Month'] = df[date_column_name].dt.month
-    
-    return df
 
 def list_categorical_columns(df):
     '''
@@ -844,4 +819,40 @@ class subdirectories (default: False).
                 next(generator)
                 images_generated += batch_size
 
-        return generator
+        return generator
+
+
+def Nantreatment(data, replace=True, replace_value='None', replace_numeric_with_mean=False):
+    '''
+    Function:
+    -----------
+    This function works with the Nan's inside of a DataFrame, wich give you diferents option when you try to work with them
+    Parameters:
+    -----------
+    data: Pandas DataFrame
+        Data that the function is going to analyze 
+    replace: bool
+        Depends if its True or False, True gives you the Nan replace by a zero or the mean if the column is a number
+        and None if the column is an object,in case that replace is False, drops all the Nan's in the DataFrame
+    replace_numeric_with_mean: bool
+        choose if you want to Nan with 0 or with the mean
+    
+    Returns:
+    -----------
+    Pandas DataFrame
+        The function returns a copy of the input DataFrame with NaN values replaced or dropped.
+    '''
+
+
+    if replace:
+        if replace_numeric_with_mean:
+            data = data.fillna(value=data.mean())
+        else:
+            for name in data.select_dtypes(include=[np.number]):
+                data[name] = data[name].fillna(value=0)
+        for name in data.select_dtypes(include=[object]):
+            data[name] = data[name].fillna(replace_value)
+    else:
+        data = data.dropna()
+    
+    return data.reset_index(drop=True)