15
15
DirectoryIterator
16
16
)
17
17
18
- def extract_date (df , date_column_name ):
19
- '''
20
- Function to extract the data of a date, from a column that does not have date format.
21
- The function also creates a column for date and year.
22
-
23
- Parameters
24
- ----------
25
- df : dataframe
26
- It is the dataset where the column that we have to extract the data is located.
27
- date_column_name : str
28
- is the column that has data in string. From here the data is extracted and converted to date format.
29
-
30
-
31
- Return
32
- ----------
33
- df : Dataframe with the changes made
34
-
35
- '''
36
- match = re .search (r'\d{4}.\d{2}.\d{2}' , date_column_name )
37
- date = datetime .strptime (match .group (), '%Y-%m-%d' ).date ()
38
-
39
- df ['Year' ] = df [date_column_name ].dt .year
40
- df ['Month' ] = df [date_column_name ].dt .month
41
-
42
- return df
43
18
44
19
def list_categorical_columns (df ):
45
20
'''
@@ -844,4 +819,40 @@ class subdirectories (default: False).
844
819
next (generator )
845
820
images_generated += batch_size
846
821
847
- return generator
822
+ return generator
823
+
824
+
825
+ def Nantreatment (data , replace = True , replace_value = 'None' , replace_numeric_with_mean = False ):
826
+ '''
827
+ Function:
828
+ -----------
829
+ This function works with the Nan's inside of a DataFrame, wich give you diferents option when you try to work with them
830
+ Parameters:
831
+ -----------
832
+ data: Pandas DataFrame
833
+ Data that the function is going to analyze
834
+ replace: bool
835
+ Depends if its True or False, True gives you the Nan replace by a zero or the mean if the column is a number
836
+ and None if the column is an object,in case that replace is False, drops all the Nan's in the DataFrame
837
+ replace_numeric_with_mean: bool
838
+ choose if you want to Nan with 0 or with the mean
839
+
840
+ Returns:
841
+ -----------
842
+ Pandas DataFrame
843
+ The function returns a copy of the input DataFrame with NaN values replaced or dropped.
844
+ '''
845
+
846
+
847
+ if replace :
848
+ if replace_numeric_with_mean :
849
+ data = data .fillna (value = data .mean ())
850
+ else :
851
+ for name in data .select_dtypes (include = [np .number ]):
852
+ data [name ] = data [name ].fillna (value = 0 )
853
+ for name in data .select_dtypes (include = [object ]):
854
+ data [name ] = data [name ].fillna (replace_value )
855
+ else :
856
+ data = data .dropna ()
857
+
858
+ return data .reset_index (drop = True )
0 commit comments