### ***Createing User-Defined Function For Doing All Cleaning Steps In One Step:***

In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import ast

***Create Function Wrang (Data Wrangling) With Optional Variables (None):***

In [37]:
def wrang(
    dataframe,
    encoding = None,
    dropnanvalues = None,
    duplicatevlues = None,
    datecolumn = None,
    renamedcolumns = None  
):
    # Loading Data From CSV File:
    df = pd.read_csv(dataframe, encoding= encoding)

    # Drop Nan Value Of Specific Columns:
    df.dropna(subset=dropnanvalues, inplace=True)

    # Remove Duplicates From Specific Columns:
    df.drop_duplicates(subset=duplicatevlues, inplace=True)

    # Convert Date Column From Object to Datetime:
    df[datecolumn] = pd.to_datetime(df[datecolumn])

    # Rename Columns Names:
    df.rename(columns= renamedcolumns, inplace=True)

    # Stripe and Lowercase Columns Names:
    df.columns = df.columns.str.strip().str.lower()

    return(df)    

In [38]:
wrang(dataframe="Full_Join_Data.csv",
      encoding="ISO-8859-1",
      dropnanvalues=["OrderId"],
      duplicatevlues="OrderId",
      datecolumn="OrderDate", 
      renamedcolumns={"City.1":"SupplierCity", "Country.1":"SupplierCountry", "Phone.1":"SupplierPhone"}
      )

Unnamed: 0,customerid,firstname,lastname,city,country,phone,orderid,orderdate,ordernumber,totalamount,...,unitprice,package,isdiscontinued,supplierid,companyname,contactname,suppliercity,suppliercountry,supplierphone,fax
0,1.0,Maria,Anders,Berlin,Germany,030-0074321,396.0,2013-08-25,542773.0,1086.00,...,45.60,25 - 825 g cans,True,12.0,Plutzer Lebensmittelgro?m?rkte AG,Martin Bein,Frankfurt,Germany,(069) 992755,
3,1.0,Maria,Anders,Berlin,Germany,030-0074321,445.0,2013-10-03,542822.0,878.00,...,43.90,15 - 625 g jars,False,7.0,"Pavlova, Ltd.",Ian Devling,Melbourne,Australia,(03) 444-2343,(03) 444-6588
4,1.0,Maria,Anders,Berlin,Germany,030-0074321,455.0,2013-10-13,542832.0,330.00,...,10.00,12 - 550 ml bottles,False,1.0,Exotic Liquids,Charlotte Cooper,London,UK,(171) 555-2222,
6,1.0,Maria,Anders,Berlin,Germany,030-0074321,764.0,2014-04-09,543141.0,960.00,...,13.25,24 pieces,False,27.0,Escargots Nouveaux,Marie Delamare,Montceau,France,85.57.00.07,
8,1.0,Maria,Anders,Berlin,Germany,030-0074321,588.0,2014-01-15,542965.0,851.00,...,55.00,5 kg pkg.,False,28.0,Gai pâturage,Eliane Noz,Annecy,France,38.76.98.06,38.76.98.58
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2146,91.0,Zbyszek,Piestrzeniewicz,Warszawa,Poland,(26) 642-7012,545.0,2013-12-23,542922.0,399.85,...,19.00,24 - 12 oz bottles,False,1.0,Exotic Liquids,Charlotte Cooper,London,UK,(171) 555-2222,
2149,91.0,Zbyszek,Piestrzeniewicz,Warszawa,Poland,(26) 642-7012,623.0,2014-02-04,543000.0,160.00,...,18.00,24 - 12 oz bottles,False,16.0,Bigfoot Breweries,Cheryl Saylor,Bend,USA,(503) 555-9931,
2151,91.0,Zbyszek,Piestrzeniewicz,Warszawa,Poland,(26) 642-7012,659.0,2014-02-25,543036.0,427.50,...,28.50,24 - 500 ml bottles,False,29.0,Forêts d'érables,Chantal Goulet,Ste-Hyacinthe,Canada,(514) 555-2955,(514) 555-2921
2152,91.0,Zbyszek,Piestrzeniewicz,Warszawa,Poland,(26) 642-7012,751.0,2014-04-03,543128.0,686.00,...,4.50,12 - 355 ml cans,True,10.0,Refrescos Americanas LTDA,Carlos Diaz,Sao Paulo,Brazil,(11) 555 4640,


***Create New Dataframe (Orders) By Using Wrang User-Defined Function:***

In [39]:
orders = wrang(dataframe="Full_Join_Data.csv",
               encoding="latin-1",
               dropnanvalues=["OrderId"],
               duplicatevlues="OrderId",
               datecolumn="OrderDate",
               renamedcolumns={"City.1":"SupplierCity", "Country.1":"SupplierCountry", "Phone.1":"SupplierPhone"})

***Note That:***
**- The error you are encountering (AttributeError: 'NoneType' object has no attribute 'info') indicates that the variable orders is `None`, which means that the function `wrang` is not returning a value explicitly. In Python, if a function doesn't have a return statement, it implicitly returns `None`.**

**- To fix this issue, you need to make sure that your `wrang` function is returning the `DataFrame` object you are working with, by including `return df` at the end of your `wrang` function, you ensure that the processed DataFrame is returned and assigned to the `orders` variable. This should resolve the `AttributeError` you were encountering.**

In [40]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
Index: 830 entries, 0 to 2156
Data columns (total 22 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   customerid       830 non-null    float64       
 1   firstname        830 non-null    object        
 2   lastname         830 non-null    object        
 3   city             830 non-null    object        
 4   country          830 non-null    object        
 5   phone            830 non-null    object        
 6   orderid          830 non-null    float64       
 7   orderdate        830 non-null    datetime64[ns]
 8   ordernumber      830 non-null    float64       
 9   totalamount      830 non-null    float64       
 10  productid        830 non-null    float64       
 11  productname      830 non-null    object        
 12  unitprice        830 non-null    float64       
 13  package          830 non-null    object        
 14  isdiscontinued   830 non-null    object       

In [59]:
columns_edit = ["customerid", "orderid", "ordernumber", "productid", "supplierid"]
for column in list(orders.columns):
    if column in columns_edit:
        orders[column] = orders[column].astype(int)


In [61]:
orders = orders[["customerid", "firstname", "lastname", "city", "country", "orderid", "ordernumber", "orderdate", "totalamount"]]
orders.info()

<class 'pandas.core.frame.DataFrame'>
Index: 830 entries, 0 to 2156
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   customerid   830 non-null    int64         
 1   firstname    830 non-null    object        
 2   lastname     830 non-null    object        
 3   city         830 non-null    object        
 4   country      830 non-null    object        
 5   orderid      830 non-null    int64         
 6   ordernumber  830 non-null    int64         
 7   orderdate    830 non-null    datetime64[ns]
 8   totalamount  830 non-null    float64       
dtypes: datetime64[ns](1), float64(1), int64(3), object(4)
memory usage: 64.8+ KB
