**Data Import for Analysis**

The cleaned CRM sales pipeline data is loaded for analysis.  
This DataFrame (`df_cleaned`) will be used for all subsequent exploration and insights.

*Rows: 3,000 | Columns: 17*

In [52]:
# --- Clean CRM-and-Sales-Pipelines.csv ---
import pandas as pd
import numpy as np
import os

# Define file paths
input_path = 'CRM-and-Sales-Pipelines.csv'
output_path = 'CRM-and-Sales-Pipelines_cleaned.csv'

# Read the CSV file
# Note: encoding and delimiter can be adjusted if needed
print('Reading the original CSV file...')
df = pd.read_csv(input_path)

# 1. Remove duplicate rows
print('Removing duplicate rows...')
df = df.drop_duplicates()

# 2. Strip whitespace from headers and string columns
print('Stripping whitespace from headers and string columns...')
df.columns = df.columns.str.strip()
str_cols = df.select_dtypes(include='object').columns
df[str_cols] = df[str_cols].apply(lambda x: x.str.strip())

# 3. Standardize column names (remove spaces, fix typos)
print('Standardizing column names...')
col_rename = { 'Lattitude': 'Latitude', 'Deal Value, $': 'Deal_Value_USD', 'Probability, %': 'Probability_Pct' }
df = df.rename(columns=col_rename)
df.columns = df.columns.str.replace(' ', '_')

# 4. Handle missing values (example: fill with NaN, or drop if all NaN)
print('Handling missing values...')
df = df.replace({'': np.nan, 'NA': np.nan, 'N/A': np.nan})
# Optionally, drop rows where all values are NaN
df = df.dropna(how='all')

# 5. Ensure numeric columns are properly typed
print('Converting columns to appropriate dtypes...')
numeric_cols = ['Latitude', 'Longitude', 'Deal_Value_USD', 'Probability_Pct']
for col in numeric_cols:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')

# Save the cleaned file
print('Saving cleaned file as', output_path)
df.to_csv(output_path, index=False)

print('File has been cleaned.')
print(f'New file has been created: {output_path}')


Reading the original CSV file...
Removing duplicate rows...
Stripping whitespace from headers and string columns...
Standardizing column names...
Handling missing values...
Converting columns to appropriate dtypes...
Saving cleaned file as CRM-and-Sales-Pipelines_cleaned.csv
File has been cleaned.
New file has been created: CRM-and-Sales-Pipelines_cleaned.csv


**Load Cleaned Data**

The cleaned CRM sales pipeline data is loaded for analysis and previewed.  
This DataFrame will be used for all further exploration.

In [67]:
import pandas as pd

cleaned_path = 'CRM-and-Sales-Pipelines_cleaned.csv'
df_cleaned = pd.read_csv(cleaned_path)
print('Loaded cleaned data from', cleaned_path)
print('This DataFrame will be used for future analysis.')
print('Shape:', df_cleaned.shape)
df_cleaned.head()

Loaded cleaned data from CRM-and-Sales-Pipelines_cleaned.csv
This DataFrame will be used for future analysis.
Shape: (3000, 17)


Unnamed: 0,Organization,Country,Latitude,Longitude,Industry,Organization_size,Owner,Lead_acquisition_date,Product,Status,Status_sequence,Stage,Stage_sequence,Deal_Value_USD,Probability_Pct,Expected_close_date,Actual_close_date
0,Thoughtblab,Netherlands,52.370216,4.895168,Banking and Finance,Small (11-200),John Smith,4/20/2024,SAAS,Churned Customer,7,,,833,90,8/7/2024,6/27/2024
1,Jaxnation,Spain,40.416775,-3.70379,Energy & Utilities,Small (11-200),Emily Johnson,5/28/2024,SAAS,Churned Customer,7,,,1623,30,10/25/2024,9/11/2024
2,Mybuzz,Italy,41.902782,12.496366,Education & Science,Small (11-200),Michael Brown,3/17/2024,SAAS,Churned Customer,7,,,1928,20,3/17/2025,5/11/2024
3,Kare,Germany,52.520008,13.404954,Government Administration Healthcare,Small (11-200),Michael Brown,1/18/2024,SAAS,Churned Customer,7,,,303,50,8/7/2024,5/6/2024
4,Skaboo,Germany,52.520008,13.404954,Energy & Utilities,Small (11-200),Michael Brown,4/6/2024,SAAS,Churned Customer,7,,,1911,30,10/11/2024,7/25/2024
