In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the datasets
customers = pd.read_csv('customers.csv')
pricing = pd.read_csv('pricing.csv')

# Display the first few rows of each dataset
print(customers.head())
print(pricing.head())

# Merge datasets on a common column, assuming 'customer_id' is the common column
data = pd.merge(customers, pricing, on='customer_id')

# Descriptive statistics for pricing
print("Descriptive Statistics for Pricing:")
print(pricing.describe())

# Analyze churn rate by price
plt.figure(figsize=(10, 6))
sns.boxplot(x='churn', y='price', data=data)
plt.title('Price vs. Churn')
plt.xlabel('Churn')
plt.ylabel('Price')
plt.show()

# Analyze churn rate by region
churn_by_region = data.groupby('region')['churn'].mean().reset_index()
plt.figure(figsize=(12, 8))
sns.barplot(x='region', y='churn', data=churn_by_region)
plt.title('Churn Rate by Region')
plt.xlabel('Region')
plt.ylabel('Churn Rate')
plt.xticks(rotation=45)
plt.show


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load datasets
customer_data = pd.read_csv('customer_data.csv')
price_data = pd.read_csv('price_data.csv')
churn_data = pd.read_csv('churn_data.csv')

# Display the first few rows of each dataset to understand their structure
print("Customer Data:")
display(customer_data.head())

print("Price Data:")
display(price_data.head())

print("Churn Data:")
display(churn_data.head())


In [None]:
# Data types of each column
print("Customer Data Types:")
print(customer_data.dtypes)

print("\nPrice Data Types:")
print(price_data.dtypes)

print("\nChurn Data Types:")
print(churn_data.dtypes)

# Descriptive statistics
print("\nCustomer Data Descriptive Statistics:")
display(customer_data.describe())

print("\nPrice Data Descriptive Statistics:")
display(price_data.describe())

print("\nChurn Data Descriptive Statistics:")
display(churn_data.describe())


In [None]:
# Checking for missing values in each dataset
print("\nMissing Values in Customer Data:")
print(customer_data.isnull().sum())

print("\nMissing Values in Price Data:")
print(price_data.isnull().sum())

print("\nMissing Values in Churn Data:")
print(churn_data.isnull().sum())


In [None]:
# Visualizing distributions of columns in customer data
plt.figure(figsize=(12, 6))
sns.histplot(customer_data['usage'], kde=True, bins=30)
plt.title('Usage Distribution')
plt.xlabel('Usage')
plt.ylabel('Frequency')
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(customer_data['sign_up_date'], kde=True, bins=30)
plt.title('Sign Up Date Distribution')
plt.xlabel('Sign Up Date')
plt.ylabel('Frequency')
plt.show()

# Visualizing distributions of columns in price data
plt.figure(figsize=(12, 6))
sns.histplot(price_data['variable_pricing'], kde=True, bins=30)
plt.title('Variable Pricing Distribution')
plt.xlabel('Variable Pricing')
plt.ylabel('Frequency')
plt.show()

plt.figure(figsize=(12, 6))
sns.histplot(price_data['fixed_pricing'], kde=True, bins=30)
plt.title('Fixed Pricing Distribution')
plt.xlabel('Fixed Pricing')
plt.ylabel('Frequency')
plt.show()

# Visualizing churn data
plt.figure(figsize=(6, 4))
sns.countplot(x='churn', data=churn_data)
plt.title('Churn Distribution')
plt.xlabel('Churn')
plt.ylabel('Count')
plt.show()


In [None]:
import pandas as pd

# Load the datasets
# Assuming the datasets are in CSV format and available in the same directory as this script
df = pd.read_csv('your_dataset.csv')

# Inspect the dataset
print("Initial Dataset Shape:", df.shape)
print(df.head())

# 1. Remove irrelevant columns
# For demonstration, let's assume we identified a column 'irrelevant_column' to be removed
columns_to_remove = ['irrelevant_column']
df.drop(columns=columns_to_remove, inplace=True)

# 2. Create new features from existing columns
# Assuming we have a date column 'date_column', we'll create new features like year, month, and day
df['year'] = pd.to_datetime(df['date_column']).dt.year
df['month'] = pd.to_datetime(df['date_column']).dt.month
df['day'] = pd.to_datetime(df['date_column']).dt.day

# 3. Combine some columns together to create better columns
# For example, combining 'feature1' and 'feature2' into a new feature 'combined_feature'
df['combined_feature'] = df['feature1'] + df['feature2']

# 4. Combine datasets (if applicable)
# Assuming we have another dataset to merge on 'common_key'
# df_other = pd.read_csv('other_dataset.csv')
# df = pd.merge(df, df_other, on='common_key', how='left')

# Inspect the transformed dataset
print("Transformed Dataset Shape:", df.shape)
print(df.head())

# Save the transformed dataset
df.to_csv('transformed_dataset.csv', index=False)
